# coding: utf-8
#
# Copyright 2018 Yaman Güçlü
import os
import warnings
import numpy as np
from types import MappingProxyType
from scipy.sparse import coo_matrix, diags as sp_diags
from psydac.ddm.mpi import mpi as MPI
from psydac.linalg.basic import VectorSpace, Vector, LinearOperator
from psydac.ddm.cart import find_mpi_type, CartDecomposition, InterfaceCartDecomposition
from psydac.ddm.utilities import get_data_exchanger
from psydac.api.settings import PSYDAC_BACKENDS
from psydac.linalg.kernels.axpy_kernels import axpy_1d, axpy_2d, axpy_3d
from psydac.linalg.kernels.inner_kernels import inner_1d, inner_2d, inner_3d
from psydac.linalg.kernels.matvec_kernels import matvec_1d, matvec_2d, matvec_3d
from psydac.linalg.kernels.transpose_kernels import transpose_1d, transpose_2d, transpose_3d
from psydac.linalg.kernels.transpose_kernels import interface_transpose_1d, interface_transpose_2d, interface_transpose_3d
from psydac.linalg.kernels.stencil2coo_kernels import stencil2coo_1d_F, stencil2coo_2d_F, stencil2coo_3d_F
from psydac.linalg.kernels.stencil2coo_kernels import stencil2coo_1d_C, stencil2coo_2d_C, stencil2coo_3d_C
__all__ = (
'StencilVectorSpace',
'StencilVector',
'StencilMatrix',
'StencilInterfaceMatrix'
)
#===============================================================================
# Dictionary used to select correct kernel functions based on dimensionality
kernels = {
'axpy' : (None, axpy_1d, axpy_2d, axpy_3d),
'inner' : (None, inner_1d, inner_2d, inner_3d),
'matvec': (None, matvec_1d, matvec_2d, matvec_3d),
'transpose': (None, transpose_1d, transpose_2d, transpose_3d),
'interface_transpose': (None, interface_transpose_1d, interface_transpose_2d, interface_transpose_3d),
'stencil2coo': {'F': (None, stencil2coo_1d_F, stencil2coo_2d_F, stencil2coo_3d_F),
'C': (None, stencil2coo_1d_C, stencil2coo_2d_C, stencil2coo_3d_C)}
}
#===============================================================================
def compute_diag_len(pads, shifts_domain, shifts_codomain, return_padding=False):
"""
Compute the diagonal length and the padding of the stencil matrix for each direction,
using the shifts of the domain and the codomain.
Parameters
----------
pads : tuple-like (int)
Padding along each direction.
shifts_domain : tuple_like (int)
Shifts of the domain along each direction.
shifts_codomain : tuple_like (int)
Shifts of the codomain along each direction.
return_padding : bool
Return the new padding if True.
Returns
-------
n : (int)
Diagonal length of the stencil matrix.
ep : (int)
Padding that constitutes the starting index of the non zero elements.
"""
n = ((np.ceil((pads+1)/shifts_codomain)-1)*shifts_domain).astype('int')
ep = -np.minimum(0, n-pads)
n = n + ep + pads + 1
if return_padding:
return n.astype('int'), ep.astype('int')
else:
return n.astype('int')
#===============================================================================
class StencilVectorSpace(VectorSpace):
"""
Vector space for n-dimensional stencil format. Two different initializations
are possible:
- serial : StencilVectorSpace(npts, pads, periods, shifts=None, starts=None, ends=None, dtype=float)
- parallel: StencilVectorSpace(cart, dtype=float)
Parameters
----------
npts : tuple-like (int)
Number of entries along each direction
(= global dimensions of vector space).
pads : tuple-like (int)
Padding p along each direction needed for the ghost regions.
periods : tuple-like (bool)
Periodicity along each direction.
shifts : tuple-like (int)
shift m of the coefficients in each direction.
starts : tuple-like (int)
Index of the first coefficient local to the space in each direction.
ends : tuple-like (int)
Index of the last coefficient local to the space in each direction.
dtype : type
Type of scalar entries.
cart : psydac.ddm.cart.CartDecomposition
Tensor-product grid decomposition according to MPI Cartesian topology.
"""
def __init__(self, cart, dtype=float):
assert isinstance(cart, (CartDecomposition, InterfaceCartDecomposition))
# Sequential attributes
self._parallel = cart.is_parallel
self._cart = cart
self._ndim = cart._ndims
self._npts = cart.npts
self._pads = cart.pads
self._periods = cart.periods
self._shifts = cart.shifts
self._dtype = dtype
self._starts = cart.starts
self._ends = cart.ends
# The shape of the allocated numpy array
self._shape = cart.shape
self._parent_starts = cart.parent_starts
self._parent_ends = cart.parent_ends
self._mpi_type = find_mpi_type(dtype)
# The dictionary follows the structure {(axis, ext): StencilVectorSpace()}
# where axis and ext represent the boundary shared by two patches
self._interfaces = {}
self._interfaces_readonly = MappingProxyType(self._interfaces)
# Parallel attributes
if cart.is_parallel and not cart.is_comm_null:
self._mpi_type = find_mpi_type(dtype)
if isinstance(cart, InterfaceCartDecomposition):
# TODO : Check if this line really change the ._shape
self._shape = cart.get_interface_communication_infos(cart.axis)['gbuf_recv_shape'][0]
else:
self._synchronizer = get_data_exchanger(cart, dtype , assembly=True, blocking=False)
# Select kernel for AXPY operation
if self._ndim in [1, 2, 3]:
self._axpy_func = kernels['axpy'][self._ndim]
else:
self._axpy_func = self._axpy_python
self._axpy_work = self.zeros() # work array
# Select kernel for inner product
if self._ndim in [1, 2, 3]:
self._inner_func = kernels['inner'][self._ndim]
else:
self._inner_func = self._inner_python
# Constant arguments for inner product: total number of ghost cells
self._inner_consts = tuple(np.int64(p * s) for p, s in zip(self._pads, self._shifts))
# TODO [YG, 06.09.2023]: print warning if pure Python functions are used
#--------------------------------------
# Pure Python methods for backup
#--------------------------------------
def _axpy_python(self, a, x, y):
w = self._axpy_work
x.copy(out=w) # w <- x
w *= a # w <- a * x
y += w # y <- a * x + y
@staticmethod
def _inner_python(v1, v2, nghost):
index = tuple(slice(ng, -ng) for ng in nghost)
return np.vdot(v1[index].flat, v2[index].flat)
#--------------------------------------
# Abstract interface
#--------------------------------------
@property
def dimension(self):
""" The dimension of a vector space V is the cardinality
(i.e. the number of vectors) of a basis of V over its base field.
"""
return np.prod(self._npts)
# ...
@property
def dtype(self):
return self._dtype
# ...
def zeros(self):
"""
Get a copy of the null element of the StencilVectorSpace V.
Returns
-------
null : StencilVector
A new vector object with all components equal to zero.
"""
return StencilVector(self)
#...
def inner(self, x, y):
"""
Evaluate the inner vector product between two vectors of this space V.
If the field of V is real, compute the classical scalar product.
If the field of V is complex, compute the classical sesquilinear
product with linearity on the second vector.
TODO [YG 01.05.2025]: Currently, the first vector is conjugated. We
want to reverse this behavior in order to align with the convention
of FEniCS.
Parameters
----------
x : Vector
The first vector in the scalar product. In the case of a complex
field, the inner product is antilinear w.r.t. this vector (hence
this vector is conjugated).
y : Vector
The second vector in the scalar product. The inner product is
linear w.r.t. this vector.
Returns
-------
float | complex
The scalar product of the two vectors. Note that inner(x, x) is
a non-negative real number which is zero if and only if x = 0.
"""
assert isinstance(x, StencilVector)
assert isinstance(y, StencilVector)
assert x.space is self
assert y.space is self
inner_func = self._inner_func
inner_args = (x._data, y._data, *self._inner_consts)
if self.parallel:
# Sometimes in the parallel case, we can get an empty vector that breaks our kernel
x._dot_send_data[0] = 0 if x._data.shape[0] == 0 else inner_func(*inner_args)
self.cart.global_comm.Allreduce((x._dot_send_data, self.mpi_type),
(x._dot_recv_data, self.mpi_type),
op=MPI.SUM )
return x._dot_recv_data[0]
else:
return inner_func(*inner_args)
# ...
def axpy(self, a, x, y):
"""
Increment the vector y with the a-scaled vector x, i.e. y = a * x + y,
provided that x and y belong to the same vector space V (self).
The scalar value a may be real or complex, depending on the field of V.
Parameters
----------
a : scalar
The scaling coefficient needed for the operation.
x : StencilVector
The vector which is not modified by this function.
y : StencilVector
The vector modified by this function (incremented by a * x).
"""
assert isinstance(x, StencilVector)
assert isinstance(y, StencilVector)
assert x._space is self
assert y._space is self
if self.dtype == complex:
a = complex(a)
else:
if isinstance(a, complex):
raise TypeError('A complex scalar was given in a real case')
else:
a = float(a)
self._axpy_func(a, x._data, y._data)
for axis, ext in self.interfaces:
self._axpy_func(a, x._interface_data[axis, ext], y._interface_data[axis, ext])
x._sync = x._sync and y._sync
#--------------------------------------
# Other properties/methods
#--------------------------------------
@property
def mpi_type(self):
return self._mpi_type
@property
def shape(self):
return self._shape
@property
def parallel(self):
return self._parallel
# ...
@property
def cart(self):
return self._cart
# ...
@property
def npts(self):
return self._npts
# ...
@property
def starts(self):
return self._starts
# ...
@property
def ends(self):
return self._ends
# ...
@property
def parent_starts(self):
return self._parent_starts
# ...
@property
def parent_ends(self):
return self._parent_ends
# ...
@property
def pads(self):
return self._pads
# ...
@property
def periods(self):
return self._periods
# ...
@property
def shifts(self):
return self._shifts
# ...
@property
def ndim(self):
return self._ndim
@property
def interfaces(self):
return self._interfaces_readonly
def set_interface(self, axis, ext, cart):
"""
Set the interface space along a given axis and extremity.
Parameters
----------
axis : int
The axis of the new Interface space.
ext: {-1, 1}
The extremity of the new Interface space.
cart: CartDecomposition
The cart of the new space.
"""
assert int(ext) in [-1, 1]
assert isinstance(cart, (CartDecomposition, InterfaceCartDecomposition))
if cart.is_comm_null:
return
# Create the interface space in the parallel case using the new cart
if isinstance(cart, InterfaceCartDecomposition):
# Case where the patches that share the interface are owned by different intra-communicators
space = StencilVectorSpace(cart, dtype=self.dtype)
self._interfaces[axis, ext] = space
else:
# Case where the patches that share the interface are owned by the same intra-communicator
if self.parent_ends[axis] is not None:
diff = min(1,self.parent_ends[axis]-self.ends[axis])
else:
diff = 0
starts = list(cart._starts)
ends = list(cart._ends)
parent_starts = list(cart._parent_starts)
parent_ends = list(cart._parent_ends)
if ext == 1:
starts[axis] = self.ends[axis]-self.pads[axis]+diff
if parent_starts[axis] is not None:
parent_starts[axis] = parent_ends[axis]-self.pads[axis]
else:
ends[axis] = self.pads[axis]-diff
if parent_ends[axis] is not None:
parent_ends[axis] = self.pads[axis]
cart = cart.change_starts_ends(tuple(starts), tuple(ends), tuple(parent_starts), tuple(parent_ends))
#TODO Check if we create object from it, otherwise its only purpose is to store some parameters which is innefficient
space = StencilVectorSpace(cart, self.dtype)
self._interfaces[axis, ext] = space
#===============================================================================
[docs]
class StencilVector(Vector):
"""
Vector in n-dimensional stencil format.
Parameters
----------
V : psydac.linalg.stencil.StencilVectorSpace
Space to which the new vector belongs.
"""
def __init__(self, V):
assert isinstance(V, StencilVectorSpace)
self._space = V
self._sizes = V.shape
self._ndim = len(V.npts)
self._data = np.zeros(V.shape, dtype=V.dtype)
self._dot_send_data = np.zeros((1,), dtype=V.dtype)
self._dot_recv_data = np.zeros((1,), dtype=V.dtype)
self._interface_data = {}
self._requests = None
# allocate data for the boundary that shares an interface
for axis, ext in V.interfaces:
self._interface_data[axis, ext] = np.zeros(V.interfaces[axis, ext].shape, dtype=V.dtype)
#prepare communications
if V.cart.is_parallel and not V.cart.is_comm_null and isinstance(V.cart, CartDecomposition):
self._requests = V._synchronizer.prepare_communications(self._data)
# TODO: distinguish between different directions
self._sync = False
#...
def __del__(self):
# Release memory of persistent MPI communication channels
if self._requests:
for request in self._requests:
request.Free()
#--------------------------------------
# Abstract interface
#--------------------------------------
@property
def space(self):
return self._space
# ...
[docs]
def toarray(self, *, order='C', with_pads=False):
"""
Return a numpy 1D array corresponding to the given StencilVector,
with or without pads.
Parameters
----------
with_pads : bool
If True, include pads in output array (ignored in serial case).
order: {'C','F'}
Memory representation of the data ‘C’ for row-major ordering (C-style), ‘F’ column-major ordering (Fortran-style).
Returns
-------
array : numpy.ndarray
A copy of the data array collapsed into one dimension.
"""
# In parallel case, call different functions based on 'with_pads' flag
if self.space.parallel:
if with_pads:
return self._toarray_parallel_with_pads(order=order)
else:
return self._toarray_parallel_no_pads(order=order)
# In serial case, ignore 'with_pads' flag
return self.toarray_local(order=order)
#...
[docs]
def copy(self, out=None):
if self is out:
return self
w = out or StencilVector( self._space )
np.copyto(w._data, self._data, casting='no')
for axis, ext in self._space.interfaces:
np.copyto(w._interface_data[axis, ext], self._interface_data[axis, ext], casting='no')
w._sync = self._sync
return w
#...
[docs]
def conjugate(self, out=None):
if out is not None:
assert isinstance(out, StencilVector)
assert out.space is self.space
else:
out = StencilVector(self.space)
np.conjugate(self._data, out=out._data, casting='no')
for axis, ext in self._space.interfaces:
np.conjugate(self._interface_data[axis, ext], out=out._interface_data[axis, ext], casting='no')
out._sync = self._sync
return out
#...
def __neg__(self):
w = StencilVector( self._space )
np.negative(self._data, out=w._data)
for axis, ext in self._space.interfaces:
np.negative(self._interface_data[axis, ext], out=w._interface_data[axis, ext])
w._sync = self._sync
return w
#...
def __mul__(self, a):
w = StencilVector( self._space )
np.multiply(self._data, a, out=w._data)
for axis, ext in self._space.interfaces:
np.multiply(self._interface_data[axis, ext], a, out=w._interface_data[axis, ext])
w._sync = self._sync
return w
#...
def __add__(self, v):
assert isinstance( v, StencilVector )
assert v._space is self._space
w = StencilVector( self._space )
np.add(self._data, v._data, out=w._data)
for axis, ext in self._space.interfaces:
np.add(self._interface_data[axis, ext], v._interface_data[axis, ext], out=w._interface_data[axis, ext])
w._sync = self._sync and v._sync
return w
#...
def __sub__(self, v):
assert isinstance( v, StencilVector )
assert v._space is self._space
w = StencilVector( self._space )
np.subtract(self._data, v._data, out=w._data)
for axis, ext in self._space.interfaces:
np.subtract(self._interface_data[axis, ext], v._interface_data[axis, ext], out=w._interface_data[axis, ext])
w._sync = self._sync and v._sync
return w
#...
def __imul__(self, a):
self._data *= a
for axis, ext in self._space.interfaces:
self._interface_data[axis, ext] *= a
return self
#...
def __iadd__(self, v):
assert isinstance( v, StencilVector )
assert v._space is self._space
self._data += v._data
for axis, ext in self._space.interfaces:
self._interface_data[axis, ext] += v._interface_data[axis, ext]
self._sync = v._sync and self._sync
return self
#...
def __isub__(self, v):
assert isinstance( v, StencilVector )
assert v._space is self._space
self._data -= v._data
for axis, ext in self._space.interfaces:
self._interface_data[axis, ext] -= v._interface_data[axis, ext]
self._sync = v._sync and self._sync
return self
#--------------------------------------
# Other properties/methods
#--------------------------------------
@property
def starts(self):
return self._space.starts
# ...
@property
def ends(self):
return self._space.ends
# ...
@property
def pads(self):
return self._space.pads
# ...
def __str__(self):
txt = '\n'
txt += '> starts :: {starts}\n'.format( starts= self.starts )
txt += '> ends :: {ends}\n' .format( ends = self.ends )
txt += '> pads :: {pads}\n' .format( pads = self.pads )
txt += '> data :: {data}\n' .format( data = self._data )
txt += '> sync :: {sync}\n' .format( sync = self._sync )
return txt
# ...
[docs]
def toarray_local(self , *, order='C'):
""" return the local array without the padding"""
idx = tuple( slice(m*p,-m*p) if p != 0 else slice(0, None) for p,m in zip(self.pads, self.space.shifts) )
return self._data[idx].flatten( order=order)
# ...
def _toarray_parallel_no_pads(self, order='C'):
a = np.zeros( self.space.npts, self.dtype )
idx_from = tuple( slice(m*p,-m*p) if p != 0 else slice(0, None) for p,m in zip(self.pads, self.space.shifts) )
idx_to = tuple( slice(s,e+1) for s,e in zip(self.starts,self.ends) )
a[idx_to] = self._data[idx_from]
return a.flatten( order=order)
# ...
def _toarray_parallel_with_pads(self, order='C'):
pads = [m*p for m,p in zip(self.space.shifts, self.pads)]
# Step 0: create extended n-dimensional array with zero values
shape = tuple( n+2*p for n,p in zip( self.space.npts, pads ) )
a = np.zeros( shape, self.dtype )
# Step 1: write extended data chunk (local to process) onto array
idx = tuple( slice(s,e+2*p+1) for s,e,p in
zip( self.starts, self.ends, pads) )
a[idx] = self._data
# Step 2: if necessary, apply periodic boundary conditions to array
ndim = self.space.ndim
for direction in range( ndim ):
periodic = self.space.cart.periods[direction]
coord = self.space.cart.coords [direction]
nproc = self.space.cart.nprocs [direction]
if periodic:
p = pads[direction]
if p == 0:
continue
# Left-most process: copy data from left to right
if coord == 0:
idx_from = tuple(
(slice(None,p) if d == direction else slice(None))
for d in range( ndim )
)
idx_to = tuple(
(slice(-2*p,-p) if d == direction else slice(None))
for d in range( ndim )
)
a[idx_to] = a[idx_from]
# Right-most process: copy data from right to left
if coord == nproc-1:
idx_from = tuple(
(slice(-p,None) if d == direction else slice(None))
for d in range( ndim )
)
idx_to = tuple(
(slice(p,2*p) if d == direction else slice(None))
for d in range( ndim )
)
a[idx_to] = a[idx_from]
# Step 3: remove ghost regions from global array
idx = tuple( slice(p,-p) if p != 0 else slice(0, None) for p in pads )
out = a[idx]
# Step 4: return flattened array
return out.flatten( order=order)
#...
[docs]
def topetsc(self):
""" Convert to petsc data structure.
"""
from psydac.linalg.topetsc import vec_topetsc
vec = vec_topetsc( self )
return vec
# ...
def __getitem__(self, key):
index = self._getindex(key)
return self._data[index]
# ...
def __setitem__(self, key, value):
index = self._getindex(key)
self._data[index] = value
# ...
@property
def ghost_regions_in_sync(self):
return self._sync
# ...
# NOTE: this property must be set collectively
@ghost_regions_in_sync.setter
def ghost_regions_in_sync(self, value):
assert isinstance(value, bool)
self._sync = value
# ...
# TODO: maybe change name to 'exchange'
[docs]
def update_ghost_regions(self):
"""
Update ghost regions before performing non-local access to vector
elements (e.g. in matrix-vector product).
Parameters
----------
direction : int
Single direction along which to operate (if not specified, all of them).
"""
# Update interior ghost regions
if self.space.parallel:
if not self.space.cart.is_comm_null:
# PARALLEL CASE: fill in ghost regions with data from neighbors
self.space._synchronizer.start_update_ghost_regions(self._data, self._requests)
self.space._synchronizer. end_update_ghost_regions(self._data, self._requests)
else:
# SERIAL CASE: fill in ghost regions along periodic directions, otherwise set to zero
self._update_ghost_regions_serial()
# Update interface ghost regions
if self.space.parallel:
for axis, ext in self.space.interfaces:
V = self.space.interfaces[axis, ext]
if isinstance(V.cart, InterfaceCartDecomposition):
continue
slices = [slice(s, e+2*m*p+1) for s,e,m,p in zip(V.starts, V.ends, V.shifts, V.pads)]
self._interface_data[axis, ext][...] = self._data[tuple(slices)]
else:
for axis, ext in self.space.interfaces:
V = self.space.interfaces[axis, ext]
slices = [slice(s, e+2*m*p+1) for s,e,m,p in zip(V.starts, V.ends, V.shifts, V.pads)]
self._interface_data[axis, ext][...] = self._data[tuple(slices)]
# Flag ghost regions as up-to-date
self._sync = True
# ...
def _update_ghost_regions_serial(self):
ndim = self._space.ndim
for direction in range(ndim):
periodic = self._space.periods[direction]
p = self._space.pads [direction] * self._space.shifts[direction]
if p == 0:
continue
idx_front = [slice(None)] * direction
idx_back = [slice(None)] * (ndim-direction-1)
if periodic:
# Copy data from left to right
idx_from = tuple(idx_front + [slice( p, 2*p)] + idx_back)
idx_to = tuple(idx_front + [slice(-p,None)] + idx_back)
self._data[idx_to] = self._data[idx_from]
# Copy data from right to left
idx_from = tuple(idx_front + [slice(-2*p,-p)] + idx_back)
idx_to = tuple(idx_front + [slice(None, p)] + idx_back)
self._data[idx_to] = self._data[idx_from]
else:
# Set left ghost region to zero
idx_ghost = tuple(idx_front + [slice(None, p)] + idx_back)
self._data[idx_ghost] = 0
# Set right ghost region to zero
idx_ghost = tuple(idx_front + [slice(-p,None)] + idx_back)
self._data[idx_ghost] = 0
# ...
[docs]
def exchange_assembly_data(self):
"""
Exchange assembly data.
"""
if self.space.parallel and not self.space.cart.is_comm_null:
# PARALLEL CASE: fill in ghost regions with data from neighbors
self.space._synchronizer.start_exchange_assembly_data(self._data)
self.space._synchronizer. end_exchange_assembly_data(self._data)
else:
# SERIAL CASE: fill in ghost regions along periodic directions, otherwise set to zero
self._exchange_assembly_data_serial()
ndim = self._space.ndim
for direction in range(ndim):
idx_front = [slice(None)] * direction
idx_back = [slice(None)] * (ndim-direction-1)
p = self._space.pads [direction]
m = self._space.shifts[direction]
if p == 0:
continue
idx_from = tuple(idx_front + [slice(-m*p,None) if (-m*p+p)!=0 else slice(-m*p,None)] + idx_back)
self._data[idx_from] = 0.
idx_from = tuple(idx_front + [slice(0,m*p)] + idx_back)
self._data[idx_from] = 0.
# ...
def _exchange_assembly_data_serial(self):
ndim = self._space.ndim
for direction in range(ndim):
periodic = self._space.periods[direction]
p = self._space.pads [direction]
m = self._space.shifts [direction]
if p == 0:
continue
if periodic:
idx_front = [slice(None)] * direction
idx_back = [slice(None)] * (ndim-direction-1)
# Copy data from left to right
idx_to = tuple(idx_front + [slice( m*p, m*p+p)] + idx_back)
idx_from = tuple(idx_front + [slice(-m*p,-m*p+p) if (-m*p+p)!=0 else slice(-m*p,None)] + idx_back)
self._data[idx_to] += self._data[idx_from]
#--------------------------------------
# Private methods
#--------------------------------------
def _getindex(self, key):
# TODO: check if we should ignore padding elements
if not isinstance(key, tuple):
key = (key,)
index = []
for (i,s,p,m) in zip(key, self.starts, self.pads,self.space.shifts):
if isinstance(i, slice):
start = None if i.start is None else i.start - s + m*p
stop = None if i.stop is None else i.stop - s + m*p
l = slice(start, stop, i.step)
else:
l = i - s + m*p
index.append(l)
return tuple(index)
#===============================================================================
class StencilMatrix(LinearOperator):
"""
Matrix in n-dimensional stencil format.
This is a linear operator that maps elements of stencil vector space V to
elements of stencil vector space W.
For now we only accept V==W.
Parameters
----------
V : psydac.linalg.stencil.StencilVectorSpace
Domain of the new linear operator.
W : psydac.linalg.stencil.StencilVectorSpace
Codomain of the new linear operator.
pads:
backend:
precompiled : bool
Whether to use precompiled kernels for .dot() and .transpose()
"""
def __init__( self, V, W, pads=None , backend=None, precompiled=True):
assert isinstance(V, StencilVectorSpace)
assert isinstance(W, StencilVectorSpace)
assert W.pads == V.pads
if not W.dtype==V.dtype:
raise NotImplementedError("The domain and the codomain should have the same data type.")
if pads is not None:
for p,vp in zip(pads, V.pads):
assert p<=vp
self._pads = pads or tuple(V.pads)
dims = list(W.shape)
diags = [compute_diag_len(p, md, mc) for p,md,mc in zip(self._pads, V.shifts, W.shifts)]
self._data = np.zeros(dims+diags, dtype=W.dtype)
self._domain = V
self._codomain = W
self._ndim = len(dims)
self._backend = backend
self._precompiled = precompiled
self._is_T = False
self._diag_indices = None
self._requests = None
# Parallel attributes
if W.parallel:
if W.cart.is_comm_null:return
# Create data exchanger for ghost regions
self._synchronizer = get_data_exchanger(
cart = W.cart,
dtype = W.dtype,
coeff_shape = diags,
assembly = True
)
# Flag ghost regions as not up-to-date (conservative choice)
self._sync = False
# Prepare the arguments for the dot product method
nd = [(ej-sj+2*gp*mj-mj*p-gp)//mj*mi+1 for sj,ej,mj,mi,p,gp in zip(V.starts, V.ends, V.shifts, W.shifts, self._pads, V.pads)]
nc = [ei-si+1 for si,ei,mj,p in zip(W.starts, W.ends, V.shifts, self._pads)]
# Number of rows in matrix (along each dimension)
nrows = [min(ni, nj) for ni,nj in zip(nc, nd)]
nrows_extra = [max(0, ni-nj) for ni,nj in zip(nc, nd)]
args = {}
args['starts'] = tuple(V.starts)
args['nrows'] = tuple(nrows)
args['nrows_extra'] = tuple(nrows_extra)
args['gpads'] = tuple(V.pads)
args['pads'] = tuple(self._pads)
args['dm'] = tuple(V.shifts)
args['cm'] = tuple(W.shifts)
ndiags, _ = list(zip(*[compute_diag_len(p,mj,mi, return_padding=True) for p,mi,mj in zip(self._pads, W.shifts, V.shifts)]))
args['pad_imp'] = [gp*m+gp+1-n-s%m+p-gp for gp,m,n,s,p in zip(V.pads, V.shifts, ndiags, V.starts, self._pads)]
args['ndiags'] = ndiags
self._dotargs_null = args
self._dot = kernels['matvec'][self._ndim]
self._transpose_args = self._prepare_transpose_args()
self._transpose_func = kernels['transpose'][self._ndim]
if backend is None:
backend = PSYDAC_BACKENDS.get(os.environ.get('PSYDAC_BACKEND')) or PSYDAC_BACKENDS['python']
self.set_backend(backend, precompiled)
#--------------------------------------
# Abstract interface
#--------------------------------------
@property
def domain(self):
return self._domain
# ...
@property
def codomain(self):
return self._codomain
# ...
@property
def dtype(self):
return self._domain.dtype
# ...
def dot(self, v, out=None):
"""
Return the matrix/vector product between self and v.
This function optimized this product.
Parameters
----------
v : StencilVector
Vector of the domain of self needed for the Matrix/Vector product.
out : StencilVector
Vector of the codomain of self.
Returns
-------
out : StencilVector
Vector of the codomain of self, contain the result of the product.
"""
assert isinstance(v, StencilVector)
assert v.space is self.domain
if out is not None:
assert isinstance( out, StencilVector )
assert out.space is self.codomain
else:
out = StencilVector( self.codomain )
# Necessary if vector space is distributed across processes
if not v.ghost_regions_in_sync:
v.update_ghost_regions()
self._func(self._data, v._data, out._data, **self._args)
# IMPORTANT: flag that ghost regions are not up-to-date
out.ghost_regions_in_sync = False
return out
# ...
def vdot( self, v, out=None):
"""
Return the matrix/vector product between the conjugate of self and v.
This function optimized this product.
Parameters
----------
v : StencilVector
Vector of the domain of self needed for the Matrix/Vector product
out : StencilVector
Vector of the codomain of self
Returns
-------
out : StencilVector
Vector of the codomain of self, contain the result of the product
"""
assert isinstance(v, StencilVector)
assert v.space is self.domain
if out is not None:
assert isinstance(out, StencilVector)
assert out.space is self.codomain
else:
out = StencilVector( self.codomain )
# Necessary if vector space is distributed across processes
if not v.ghost_regions_in_sync:
v.update_ghost_regions()
# Instead of computing A_*x, this function computes (A*x_)_
self._func(self._data, np.conjugate(v._data), out._data, **self._args)
np.conjugate(out._data, out=out._data)
# IMPORTANT: flag that ghost regions are not up-to-date
out.ghost_regions_in_sync = False
return out
# ...
def transpose(self, conjugate=False, out=None):
""""
Return the transposed StencilMatrix, or the Hermitian Transpose if conjugate==True
Parameters
----------
conjugate : Bool(optional)
True to get the Hermitian adjoint.
out : StencilMatrix(optional)
Optional out for the transpose to avoid temporaries
"""
# For clarity rename self
M = self
# If necessary, update ghost regions of original matrix M
if not M.ghost_regions_in_sync:
M.update_ghost_regions()
# Create new matrix where domain and codomain are swapped
if out is not None :
assert isinstance(out, StencilMatrix)
assert out.codomain == M.domain
assert out.domain == M.codomain
else :
out = StencilMatrix(M.codomain, M.domain, pads=self._pads, backend=self._backend, precompiled=self._precompiled)
# Call low-level '_transpose' function (works on Numpy arrays directly)
if conjugate:
self._transpose_func(np.conjugate(M._data), out._data, **self._transpose_args)
else:
self._transpose_func(M._data, out._data, **self._transpose_args)
return out
# ...
def toarray(self, **kwargs):
""" Convert to Numpy 2D array. """
order = kwargs.pop('order', 'C')
with_pads = kwargs.pop('with_pads', False)
if self.codomain.parallel and with_pads:
coo = self._tocoo_parallel_with_pads(order=order)
else:
coo = self._tocoo_no_pads(order=order)
return coo.toarray()
# ...
def tosparse(self, **kwargs):
""" Convert to any Scipy sparse matrix format. """
order = kwargs.pop('order', 'C')
with_pads = kwargs.pop('with_pads', False)
if self.codomain.parallel and with_pads:
coo = self._tocoo_parallel_with_pads(order=order)
else:
coo = self._tocoo_no_pads(order=order)
return coo
#--------------------------------------
# Overridden properties/methods
#--------------------------------------
def __neg__(self):
return self.__mul__(-1)
# ...
def __mul__(self, a):
w = StencilMatrix(self._domain, self._codomain, self._pads, self._backend, precompiled=self._precompiled)
w._data = self._data * a
w._func = self._func
w._args = self._args
w._sync = self._sync
return w
#...
def __add__(self, m):
if isinstance(m, StencilMatrix):
#assert isinstance(m, StencilMatrix)
assert m._domain is self._domain
assert m._codomain is self._codomain
assert m._pads == self._pads
if m._backend is not self._backend:
msg = 'Adding two matrices with different backends is ambiguous - defaulting to backend of first addend'
warnings.warn(msg, category=RuntimeWarning)
w = StencilMatrix(self._domain, self._codomain, self._pads, self._backend, precompiled=self._precompiled)
w._data = self._data + m._data
w._func = self._func
w._args = self._args
w._sync = self._sync and m._sync
return w
else:
return LinearOperator.__add__(self, m)
#...
def __sub__(self, m):
if isinstance(m, StencilMatrix):
#assert isinstance(m, StencilMatrix)
assert m._domain is self._domain
assert m._codomain is self._codomain
assert m._pads == self._pads
if m._backend is not self._backend:
msg = 'Subtracting two matrices with different backends is ambiguous - defaulting to backend of the matrix we subtract from'
warnings.warn(msg, category=RuntimeWarning)
w = StencilMatrix(self._domain, self._codomain, self._pads, backend=self._backend, precompiled=self._precompiled)
w._data = self._data - m._data
w._func = self._func
w._args = self._args
w._sync = self._sync and m._sync
return w
else:
return LinearOperator.__sub__(self, m)
#--------------------------------------
# New properties/methods
#--------------------------------------
# TODO: check if this method is really needed!!
def conjugate(self, out=None):
if out is not None:
assert isinstance(out, StencilMatrix)
assert out.domain is self.domain
assert out.codomain is self.codomain
else:
out = StencilMatrix(self.domain, self.codomain, pads=self.pads, backend=self._backend, precompiled=self._precompiled)
out._func = self._func
out._args = self._args
np.conjugate(self._data, out=out._data, casting='no')
return out
# ...
# TODO: check if this method is really needed!!
def conj(self, out=None):
return self.conjugate(out=out)
# ...
@property
def pads(self):
return self._pads
# ...
@property
def backend(self):
return self._backend
# ...
def __getitem__(self, key):
index = self._getindex( key )
return self._data[index]
# ...
def __setitem__(self, key, value):
index = self._getindex( key )
self._data[index] = value
#...
def max(self):
return self._data.max()
#...
def copy(self, out = None):
"""
Create a copy of self, that can potentially be stored in a given StencilMatrix.
Parameters
----------
out : StencilMatrix(optional)
The existing StencilMatrix in which we want to copy self.
"""
if out is not None :
assert isinstance(out, StencilMatrix)
assert out.domain == self.domain
assert out.codomain == self.codomain
else :
out = StencilMatrix( self.domain, self.codomain, self._pads, backend=self._backend, precompiled=self._precompiled )
out._data[:] = self._data[:]
out._func = self._func
out._args = self._args
return out
#...
def __imul__(self, a):
self._data *= a
return self
#...
def __iadd__(self, m):
if isinstance(m, StencilMatrix):
#assert isinstance(m, StencilMatrix)
assert m._domain is self._domain
assert m._codomain is self._codomain
assert m._pads == self._pads
self._data += m._data
self._sync = m._sync and self._sync
return self
else:
return LinearOperator.__add__(self, m)
#...
def __isub__(self, m):
if isinstance(m, StencilMatrix):
#assert isinstance(m, StencilMatrix)
assert m._domain is self._domain
assert m._codomain is self._codomain
assert m._pads == self._pads
self._data -= m._data
self._sync = m._sync and self._sync
return self
else:
return LinearOperator.__sub__(self, m)
#...
def __abs__(self):
w = StencilMatrix( self._domain, self._codomain, self._pads, backend=self._backend, precompiled=self._precompiled )
w._data = abs(self._data)
w._func = self._func
w._args = self._args
w._sync = self._sync
return w
#...
def remove_spurious_entries(self):
"""
If any dimension is NOT periodic, make sure that the corresponding
periodic corners are set to zero.
"""
# TODO: access 'self._data' directly for increased efficiency
ndim = self._domain.ndim
for direction in range(ndim):
periodic = self._domain.periods[direction]
if not periodic:
nc = self._codomain.npts[direction]
nd = self._domain.npts[direction]
s = self._codomain.starts[direction]
e = self._codomain.ends [direction]
p = self.pads [direction]
idx_front = [slice(None)]*direction
idx_back = [slice(None)]*(ndim-direction-1)
# Top-right corner
for i in range( max(0,s), min(p,e+1) ):
index = tuple( idx_front + [i] + idx_back +
idx_front + [slice(-p,-i)] + idx_back )
self[index] = 0
# Bottom-left corner
for i in range( max(nd-p,s), min(nc,e+1) ):
index = tuple( idx_front + [i] + idx_back +
idx_front + [slice(nd-i,p+1)] + idx_back )
self[index] = 0
# ...
def update_ghost_regions(self):
"""
Update ghost regions before performing non-local access to matrix
elements (e.g. in matrix transposition).
"""
ndim = self._codomain.ndim
parallel = self._codomain.parallel
if parallel:
if not self._codomain.cart.is_comm_null:
# PARALLEL CASE: fill in ghost regions with data from neighbors
self._synchronizer.start_update_ghost_regions( self._data, self._requests )
self._synchronizer.end_update_ghost_regions( self._data , self._requests)
else:
# SERIAL CASE: fill in ghost regions along periodic directions, otherwise set to zero
self._update_ghost_regions_serial()
# Flag ghost regions as up-to-date
self._sync = True
# ...
def exchange_assembly_data(self):
"""
Exchange assembly data.
"""
ndim = self._codomain.ndim
parallel = self._codomain.parallel
if self._codomain.parallel:
# PARALLEL CASE: fill in ghost regions with data from neighbors
self._synchronizer.start_exchange_assembly_data( self._data )
self._synchronizer.end_exchange_assembly_data( self._data )
else:
# SERIAL CASE: fill in ghost regions along periodic directions, otherwise set to zero
self._exchange_assembly_data_serial()
ndim = self._codomain.ndim
for direction in range(ndim):
idx_front = [slice(None)]*direction
idx_back = [slice(None)]*(ndim-direction-1)
p = self._codomain.pads [direction]
m = self._codomain.shifts[direction]
if p == 0:
continue
idx_from = tuple( idx_front + [ slice(-m*p,None) if (-m*p+p)!=0 else slice(-m*p,None)] + idx_back )
self._data[idx_from] = 0.
idx_from = tuple( idx_front + [ slice(0,m*p)] + idx_back )
self._data[idx_from] = 0.
# ...
def _exchange_assembly_data_serial(self):
ndim = self._codomain.ndim
for direction in range(ndim):
periodic = self._codomain.periods[direction]
p = self._codomain.pads [direction]
m = self._codomain.shifts[direction]
if p == 0:
continue
if periodic:
idx_front = [slice(None)]*direction
idx_back = [slice(None)]*(ndim-direction-1)
# Copy data from left to right
idx_to = tuple( idx_front + [slice( m*p, m*p+p)] + idx_back )
idx_from = tuple( idx_front + [slice(-m*p,-m*p+p) if (-m*p+p)!=0 else slice(-m*p,None)] + idx_back )
self._data[idx_to] += self._data[idx_from]
# ...
def diagonal(self, *, inverse = False, sqrt = False, out = None):
"""
Get the coefficients on the main diagonal as a StencilDiagonalMatrix object.
Parameters
----------
inverse : bool
If True, get the inverse of the diagonal. (Default: False).
Can be combined with sqrt to get the inverse square root.
sqrt : bool
If True, get the square root of the diagonal. (Default: False).
Can be combined with inverse to get the inverse square root.
out : StencilDiagonalMatrix
If provided, write the diagonal entries into this matrix. (Default: None).
Returns
-------
StencilDiagonalMatrix
The matrix which contains the main diagonal of self (or its inverse).
"""
# Check `inverse` argument
assert isinstance(inverse, bool)
# Determine domain and codomain of the StencilDiagonalMatrix
V, W = self.domain, self.codomain
if inverse:
V, W = W, V
# Check `out` argument
if out is not None:
assert isinstance(out, StencilDiagonalMatrix)
assert out.domain is V
assert out.codomain is W
# Extract diagonal data from self and identify output array
diagonal_indices = self._get_diagonal_indices()
diag = self._data[diagonal_indices]
data = out._data if out else None
# Calculate entries of StencilDiagonalMatrix
if inverse:
data = np.divide(1, diag, out=data)
elif out:
np.copyto(data, diag)
else:
data = diag.copy()
if sqrt:
np.sqrt(data, out=data)
# If needed create a new StencilDiagonalMatrix object
if out is None:
out = StencilDiagonalMatrix(V, W, data)
return out
# ...
def topetsc(self):
""" Convert to PETSc data structure.
"""
from psydac.linalg.topetsc import mat_topetsc
mat = mat_topetsc(self)
return mat
#--------------------------------------
# Private methods
#--------------------------------------
def _getindex(self, key):
nd = self._ndim
ii = key[:nd]
kk = key[nd:]
index = []
for i,s,p,m in zip( ii, self._codomain.starts, self._codomain.pads, self._codomain.shifts ):
x = self._shift_index( i, m*p-s )
index.append( x )
for k,p in zip( kk, self._pads ):
l = self._shift_index( k, p )
index.append( l )
return tuple(index)
# ...
@staticmethod
def _shift_index(index, shift):
if isinstance( index, slice ):
start = None if index.start is None else index.start + shift
stop = None if index.stop is None else index.stop + shift
return slice(start, stop, index.step)
else:
return index + shift
def tocoo_local(self, order='C'):
# Shortcuts
sc = self._codomain.starts
ec = self._codomain.ends
pc = self._codomain.pads
sd = self._domain.starts
ed = self._domain.ends
pd = self._domain.pads
nd = self._ndim
nr = [e-s+1 +2*p for s,e,p in zip(sc, ec, pc)]
nc = [e-s+1 +2*p for s,e,p in zip(sd, ed, pd)]
ravel_multi_index = np.ravel_multi_index
# COO storage
rows = []
cols = []
data = []
local = tuple( [slice(p,-p) for p in pc] + [slice(None)] * nd )
dd = [pdi-ppi for pdi,ppi in zip(pd, self._pads)]
for (index, value) in np.ndenumerate( self._data[local] ):
# index = [i1-s1, i2-s2, ..., p1+j1-i1, p2+j2-i2, ...]
xx = index[:nd] # ii is local
ll = index[nd:] # l=p+k
ii = [x+p for x,p in zip(xx, pc)]
jj = [(l+i+d)%n for (i,l,d,n) in zip(xx,ll,dd,nc)]
I = ravel_multi_index( ii, dims=nr, order=order )
J = ravel_multi_index( jj, dims=nc, order=order )
rows.append( I )
cols.append( J )
data.append( value )
M = coo_matrix(
(data,(rows,cols)),
shape = [np.prod(nr),np.prod(nc)],
dtype = self._domain.dtype
)
M.eliminate_zeros()
return M
#...
def _tocoo_no_pads(self , order='C'):
# Shortcuts
nr = self._codomain.npts
nd = self._ndim
nc = self._domain.npts
ss = self._codomain.starts
cpads = self._codomain.pads
dm = self._domain.shifts
cm = self._codomain.shifts
pp = [np.int64(compute_diag_len(p,mj,mi)-(p+1)) for p,mi,mj in zip(self._pads, cm, dm)]
# Range of data owned by local process (no ghost regions)
local = tuple( [slice(mi*p,-mi*p) if p != 0 else slice(p, None) for p,mi in zip(cpads, cm)] + [slice(None)] * nd )
size = self._data[local].size
# COO storage
rows = np.zeros(size, dtype='int64')
cols = np.zeros(size, dtype='int64')
data = np.zeros(size, dtype=self.dtype)
nrl = [np.int64(e-s+1) for s,e in zip(self.codomain.starts, self.codomain.ends)]
ncl = [np.int64(i) for i in self._data.shape[nd:]]
ss = [np.int64(i) for i in ss]
nr = [np.int64(i) for i in nr]
nc = [np.int64(i) for i in nc]
dm = [np.int64(i) for i in dm]
cm = [np.int64(i) for i in cm]
cpads = [np.int64(i) for i in cpads]
pp = [np.int64(i) for i in pp]
stencil2coo = kernels['stencil2coo'][order][nd]
ind = stencil2coo(self._data, data, rows, cols, *nrl, *ncl, *ss, *nr, *nc, *dm, *cm, *cpads, *pp)
M = coo_matrix(
(data[:ind],(rows[:ind],cols[:ind])),
shape = [np.prod(nr),np.prod(nc)],
dtype = self.dtype)
return M
#...
def _tocoo_parallel_with_pads(self , order='C'):
# If necessary, update ghost regions
if not self.ghost_regions_in_sync:
self.update_ghost_regions()
# Shortcuts
nr = self._codomain.npts
nc = self._domain.npts
nd = self._ndim
ss = self._codomain.starts
ee = self._codomain.ends
pp = self._pads
pc = self._codomain.pads
pd = self._domain.pads
cc = self._codomain.periods
ravel_multi_index = np.ravel_multi_index
# COO storage
rows = []
cols = []
data = []
# List of rows (to avoid duplicate updates)
I_list = []
# Shape of row and diagonal spaces
xx_dims = self._data.shape[:nd]
ll_dims = self._data.shape[nd:]
# Cycle over rows (x = p + i - s)
for xx in np.ndindex( *xx_dims ):
# Compute row multi-index with simple shift
ii = [s + x - p for (s, x, p) in zip(ss, xx, pc)]
# Apply periodicity where appropriate
ii = [i - n if (c and i >= n and i - n < s) else
i + n if (c and i < 0 and i + n > e) else i
for (i, s, e, n, c) in zip(ii, ss, ee, nr, cc)]
# Compute row flat index
# Exclude values outside global limits of matrix
try:
I = ravel_multi_index( ii, dims=nr, order=order )
except ValueError:
continue
# If I is a new row, append it to list of rows
# DO NOT update same row twice!
if I not in I_list:
I_list.append( I )
else:
continue
# Cycle over diagonals (l = p + k)
for ll in np.ndindex( *ll_dims ):
# Compute column multi-index (k = j - i)
jj = [(i+l-p) % n for (i,l,n,p) in zip(ii,ll,nc,pp)]
# Compute column flat index
J = ravel_multi_index( jj, dims=nc, order=order )
# Extract matrix value
value = self._data[(*xx, *ll)]
# Append information to COO arrays
rows.append( I )
cols.append( J )
data.append( value )
# Create Scipy COO matrix
M = coo_matrix(
(data,(rows,cols)),
shape = [np.prod(nr), np.prod(nc)],
dtype = self._domain.dtype
)
M.eliminate_zeros()
return M
# ...
@property
def ghost_regions_in_sync(self):
return self._sync
# ...
# NOTE: this property must be set collectively
@ghost_regions_in_sync.setter
def ghost_regions_in_sync(self, value):
assert isinstance(value, bool)
self._sync = value
# ...
def _update_ghost_regions_serial(self):
ndim = self._codomain.ndim
for direction in range(self._codomain.ndim):
periodic = self._codomain.periods[direction]
p = self._codomain.pads [direction]
if p == 0:
continue
idx_front = [slice(None)]*direction
idx_back = [slice(None)]*(ndim-direction-1 + ndim)
if periodic:
# Copy data from left to right
idx_from = tuple(idx_front + [slice( p, 2*p)] + idx_back)
idx_to = tuple(idx_front + [slice(-p,None)] + idx_back)
self._data[idx_to] = self._data[idx_from]
# Copy data from right to left
idx_from = tuple(idx_front + [slice(-2*p,-p)] + idx_back)
idx_to = tuple(idx_front + [slice(None, p)] + idx_back)
self._data[idx_to] = self._data[idx_from]
else:
# Set left ghost region to zero
idx_ghost = tuple(idx_front + [slice(None, p)] + idx_back)
self._data[idx_ghost] = 0
# Set right ghost region to zero
idx_ghost = tuple(idx_front + [slice(-p,None)] + idx_back)
self._data[idx_ghost] = 0
# ...
def _prepare_transpose_args(self):
#prepare the arguments for the transpose method
V = self.domain
W = self.codomain
ssc = W.starts
eec = W.ends
ssd = V.starts
eed = V.ends
pads = self._pads
gpads = V.pads
dm = V.shifts
cm = W.shifts
# Number of rows in the transposed matrix (along each dimension)
nrows = [e-s+1 for s, e in zip(ssd, eed)]
ncols = [e-s+2*m*p+1 for s, e, m, p in zip(ssc, eec, cm, gpads)]
pp = pads
ndiags, starts = list(zip(*[compute_diag_len(p, mi, mj, return_padding=True) for p, mi, mj in zip(pp, cm, dm)]))
ndiagsT, _ = list(zip(*[compute_diag_len(p, mj, mi, return_padding=True) for p, mi, mj in zip(pp, cm, dm)]))
diff = [gp-p for gp, p in zip(gpads, pp)]
sl = [(s if mi > mj else 0) + (s % mi + mi//mj if mi < mj else 0)+(s if mi == mj else 0)\
for s, p, mi, mj in zip(starts, pp, cm, dm)]
si = [(mi * p - mi * (int(np.ceil((p + 1)/mj)) - 1) if mi > mj else 0) + \
(mi * p - mi * (p//mi) + d * (mi - 1) if mi < mj else 0) + \
(mj * p - mj * (p//mi) + d * (mi - 1) if mi == mj else 0)\
for mi, mj, p, d in zip(cm, dm, pp, diff)]
sk = [n-1\
+ (-(p % mj) if mi > mj else 0)\
+ (-p + mj * (p//mi) if mi < mj else 0)\
+ (-p + mj * (p//mi) if mi == mj else 0)\
for mi, mj, n, p in zip(cm, dm, ndiagsT, pp)]
args={}
args['n'] = np.int64(nrows)
args['nc'] = np.int64(ncols)
args['gp'] = np.int64(gpads)
args['p'] = np.int64(pp)
args['dm'] = np.int64(dm)
args['cm'] = np.int64(cm)
args['nd'] = np.int64(ndiags)
args['ndT'] = np.int64(ndiagsT)
args['si'] = np.int64(si)
args['sk'] = np.int64(sk)
args['sl'] = np.int64(sl)
return args
# ...
def set_backend(self, backend, precompiled):
'''
Define which kernels are called when using .dot() and .transpose()
Parameters
----------
backend : str
Psydac backend option.
precompiled : bool
Whether to use precompiled kernels.
'''
self._backend = backend
self._args = self._dotargs_null.copy()
if self._backend is None:
for key, arg in self._args.items():
self._args[key] = np.int64(arg)
self._func = self._dot
self._args.pop('pads')
elif precompiled:
# print('Using precompiled matvec and transpose kernels ...')
from psydac.linalg import stencil_dot_kernels
from psydac.linalg import stencil_transpose_kernels
# matvec kernel
dot_func_name = 'matvec_' + str(self._ndim) + 'd_kernel'
self._func = getattr(stencil_dot_kernels, dot_func_name)
# parameter for rectangular matrices
add = [int(end_in >= end_out) for end_in, end_out in zip(self.domain.ends, self.codomain.ends)]
self._args = {}
if self._ndim == 1:
self._args['s_in'] = int(self.domain.starts[0])
self._args['p_in'] = int(self.domain.pads[0])
self._args['add'] = int(add[0])
self._args['s_out'] = int(self.codomain.starts[0])
self._args['e_out'] = int(self.codomain.ends[0])
self._args['p_out'] = int(self.codomain.pads[0])
else:
self._args['s_in'] = np.array(self.domain.starts)
self._args['p_in'] = np.array(self.domain.pads)
self._args['add'] = np.array(add)
self._args['s_out'] = np.array(self.codomain.starts)
self._args['e_out'] = np.array(self.codomain.ends)
self._args['p_out'] = np.array(self.codomain.pads)
# transpose kernel
transp_func_name = 'transpose_' + str(self._ndim) + 'd_kernel'
self._transpose_func = getattr(stencil_transpose_kernels, transp_func_name)
# parameter for rectangular matrices
add = [int(end_out >= end_in) for end_in, end_out in zip(self.domain.ends, self.codomain.ends)]
self._transpose_args = {}
if self._ndim == 1:
self._transpose_args['s_in'] = int(self.codomain.starts[0])
self._transpose_args['p_in'] = int(self.codomain.pads[0])
self._transpose_args['add'] = int(add[0])
self._transpose_args['s_out'] = int(self.domain.starts[0])
self._transpose_args['e_out'] = int(self.domain.ends[0])
self._transpose_args['p_out'] = int(self.domain.pads[0])
else:
self._transpose_args['s_in'] = np.array(self.codomain.starts)
self._transpose_args['p_in'] = np.array(self.codomain.pads)
self._transpose_args['add'] = np.array(add)
self._transpose_args['s_out'] = np.array(self.domain.starts)
self._transpose_args['e_out'] = np.array(self.domain.ends)
self._transpose_args['p_out'] = np.array(self.domain.pads)
else:
raise AttributeError(f'This is the tiny-psydac version - must use precompiled kernels (but {precompiled = })!')
from psydac.api.ast.linalg import LinearOperatorDot
if self.domain.parallel:
comm = self.codomain.cart.comm
if self.domain == self.codomain:
# In this case nrows_extra[i] == 0 for all i
dot = LinearOperatorDot(self._ndim,
block_shape = (1,1),
keys = ((0,0),),
comm = comm,
backend=frozenset(backend.items()),
nrows_extra = (self._args['nrows_extra'],),
gpads=(self._args['gpads'],),
pads=(self._args['pads'],),
dm = (self._args['dm'],),
cm = (self._args['cm'],),
dtype=self.dtype)
starts = self._args.pop('starts')
nrows = self._args.pop('nrows')
self._args.pop('nrows_extra')
self._args.pop('gpads')
self._args.pop('pads')
self._args.pop('dm')
self._args.pop('cm')
for i in range(len(nrows)):
self._args['s00_{i}'.format(i=i+1)] = np.int64(starts[i])
for i in range(len(nrows)):
self._args['n00_{i}'.format(i=i+1)] = np.int64(nrows[i])
else:
dot = LinearOperatorDot(self._ndim,
block_shape = (1,1),
keys = ((0,0),),
comm = comm,
backend=frozenset(backend.items()),
gpads=(self._args['gpads'],),
pads=(self._args['pads'],),
dm = (self._args['dm'],),
cm = (self._args['cm'],),
dtype=self.dtype)
starts = self._args.pop('starts')
nrows = self._args.pop('nrows')
nrows_extra = self._args.pop('nrows_extra')
self._args.pop('gpads')
self._args.pop('pads')
self._args.pop('dm')
self._args.pop('cm')
for i in range(len(nrows)):
self._args['s00_{i}'.format(i=i+1)] = np.int64(starts[i])
for i in range(len(nrows)):
self._args['n00_{i}'.format(i=i+1)] = np.int64(nrows[i])
for i in range(len(nrows)):
self._args['ne00_{i}'.format(i=i+1)] = np.int64(nrows_extra[i])
else:
dot = LinearOperatorDot(self._ndim,
block_shape = (1,1),
keys = ((0,0),),
comm = None,
backend=frozenset(backend.items()),
starts = (tuple(self._args['starts']),),
nrows=(tuple(self._args['nrows']),),
nrows_extra=(self._args['nrows_extra'],),
gpads=(self._args['gpads'],),
pads=(self._args['pads'],),
dm = (self._args['dm'],),
cm = (self._args['cm'],),
dtype=self.dtype)
self._args.pop('nrows')
self._args.pop('nrows_extra')
self._args.pop('gpads')
self._args.pop('pads')
self._args.pop('starts')
self._args.pop('dm')
self._args.pop('cm')
self._args.pop('pad_imp')
self._args.pop('ndiags')
self._func = dot.func
# ...
def _get_diagonal_indices(self):
"""
Compute the indices which should be applied to self._data in order to
get the matrix entries on the main diagonal. The result is also stored
in self._diag_indices, and retrieved from there on successive calls.
Returns
-------
tuple[numpy.ndarray, ndim]
The diagonal indices as a tuple of NumPy arrays of identical shape
(n1, n2, n3, ...).
"""
if self._diag_indices is None:
dp = self.domain.pads
dm = self.domain.shifts
cm = self.codomain.shifts
ss = self.codomain.starts
pp = [compute_diag_len(p, mj, mi) - p - 1 for p, mi, mj in zip(self._pads, cm, dm)]
nrows = [e - s + 1 for s, e in zip(self.codomain.starts, self.codomain.ends)]
ndim = self.domain.ndim
indices = [np.zeros(np.prod(nrows), dtype=int) for _ in range(2 * ndim)]
for l, xx in enumerate(np.ndindex(*nrows)):
ii = [m * p + x for m, p, x in zip(dm, dp, xx)]
jj = [p + x + s - ((x+s) // mi) * mj for x, mi, mj, p, s in zip(xx, cm, dm, pp, ss)]
for k in range(ndim):
indices[k][l] = ii[k]
indices[k + ndim][l] = jj[k]
self._diag_indices = tuple(idx.reshape(nrows) for idx in indices)
return self._diag_indices
#===============================================================================
class StencilDiagonalMatrix(LinearOperator):
"""
Linear operator which operates between stencil vector spaces, and which can
be represented by a matrix with non-zero entries only on its main diagonal.
As such this operator is completely local and requires no data communication.
We assume that the vectors in the domain and the codomain have the same
shape and are distributed in the same way.
Parameters
----------
V : psydac.linalg.stencil.StencilVectorSpace
Domain of the new linear operator.
W : psydac.linalg.stencil.StencilVectorSpace
Codomain of the new linear operator.
"""
def __init__(self, V, W, data):
# Check domain and codomain
assert isinstance(V, StencilVectorSpace)
assert isinstance(W, StencilVectorSpace)
assert V.starts == W.starts
assert V.ends == W.ends
data = np.asarray(data)
# Check shape of provided data
shape = tuple(e - s + 1 for s, e in zip(V.starts, V.ends))
assert data.shape == shape
# Store info in object
self._domain = V
self._codomain = W
self._data = data
#--------------------------------------
# Abstract interface
#--------------------------------------
@property
def domain(self):
return self._domain
@property
def codomain(self):
return self._codomain
@property
def dtype(self):
return self._data.dtype
def tosparse(self):
return sp_diags(self._data.ravel())
def toarray(self):
return self._data.copy()
def dot(self, v, out=None):
assert isinstance(v, StencilVector)
assert v.space is self.domain
if out is not None:
assert isinstance(out, StencilVector)
assert out.space is self.codomain
else:
out = self.codomain.zeros()
V = self.domain
i = tuple(slice(s, e + 1) for s, e in zip(V.starts, V.ends))
np.multiply(self._data, v[i], out=out[i])
out.ghost_regions_in_sync = False
return out
# ...
# TODO [YG 22.01.2024]: idot function will require a dedicated kernel
# ...
def transpose(self, *, conjugate=False, out=None):
assert isinstance(conjugate, bool)
if out is not None:
assert isinstance(out, StencilDiagonalMatrix)
assert out.domain is self.codomain
assert out.codomain is self.domain
if not (conjugate and self.dtype is complex):
if out is None:
data = self._data.copy()
else:
np.copyto(out._data, self._data, casting='no')
else:
if out is None:
data = np.conjugate(self._data, casting='no')
else:
np.conjugate(self._data, out=out._data, casting='no')
if out is None:
out = StencilDiagonalMatrix(self.codomain, self.domain, data)
return out
#--------------------------------------
# Other properties/methods
#--------------------------------------
def copy(self, *, out=None):
if out is self:
return self
if out is None:
data = self._data.copy()
out = StencilDiagonalMatrix(self.domain, self.codomain, data)
else:
assert isinstance(out, StencilDiagonalMatrix)
assert out.domain is self.domain
assert out.codomain is self.codomain
np.copyto(out._data, self._data, casting='no')
return out
def diagonal(self, *, inverse = False, out = None):
"""
Get the coefficients on the main diagonal as a StencilDiagonalMatrix object.
In the default case (inverse=False, out=None) self is returned.
Parameters
----------
inverse : bool
If True, get the inverse of the diagonal. (Default: False).
out : StencilDiagonalMatrix
If provided, write the diagonal entries into this matrix. (Default: None).
Returns
-------
StencilDiagonalMatrix
Either self, or another StencilDiagonalMatrix with the diagonal inverse.
"""
# Check `inverse` argument
assert isinstance(inverse, bool)
# Determine domain and codomain of the `out` matrix
V, W = self.domain, self.codomain
if inverse:
V, W = W, V
# Check `out` argument and identify `data` array of output vector
if out is None:
data = None
else:
assert isinstance(out, StencilDiagonalMatrix)
assert out.domain is V
assert out.codomain is W
data = out._data
# Calculate entries, or set `out=self` in default case
if inverse:
data = np.divide(1, diag, out=data)
elif out:
np.copyto(data, diag)
else:
out = self
# If needed create a new StencilDiagonalMatrix object
if out is None:
out = StencilDiagonalMatrix(V, W, data)
return out
#===============================================================================
# TODO [YG, 28.01.2021]:
# - Check if StencilMatrix should be subclassed
# - Reimplement magic methods (some are simply copied from StencilMatrix)
def flip_axis(index, n):
s = n - index.start-1
e = n - index.stop-1 if n > index.stop else None
return slice(s,e,-1)
class StencilInterfaceMatrix(LinearOperator):
"""
Matrix in n-dimensional stencil format for an interface.
This is a linear operator that maps elements of stencil vector space V to
elements of stencil vector space W.
Parameters
----------
V : psydac.linalg.stencil.StencilVectorSpace
Domain of the new linear operator.
W : psydac.linalg.stencil.StencilVectorSpace
Codomain of the new linear operator.
s_d : int
The starting index of the domain.
s_c : int
The starting index of the codomain.
d_axis : int
The axis of the Interface of the domain.
c_axis : int
The axis of the Interface of the codomain.
d_ext : int
The extremity of the domain Interface space.
the values must be 1 or -1.
c_ext : int
The extremity of the codomain Interface space.
the values must be 1 or -1.
dim : int
The axis of the interface.
pads: <list|tuple>
Padding of the linear operator.
"""
def __init__(self, V, W, s_d, s_c, d_axis, c_axis, d_ext, c_ext, *, flip=None, pads=None, backend=None):
assert isinstance(V, StencilVectorSpace)
assert isinstance(W, StencilVectorSpace)
assert W.pads == V.pads
Vin = V.interfaces[d_axis, d_ext]
if pads is not None:
for p,vp in zip(pads, Vin.pads):
assert p<=vp
self._pads = pads or tuple(Vin.pads)
dims = list(W.shape)
if W.parent_ends[c_axis] is not None:
diff = min(1, W.parent_ends[c_axis]-W.ends[c_axis])
else:
diff = 0
dims[c_axis] = W.pads[c_axis] + 1-diff + 2*W.shifts[c_axis]*W.pads[c_axis]
diags = [compute_diag_len(p, md, mc) for p,md,mc in zip(self._pads, Vin.shifts, W.shifts)]
self._data = np.zeros(dims + diags, dtype=W.dtype)
# Parallel attributes
if W.parallel and not isinstance(W.cart, InterfaceCartDecomposition):
if W.cart.is_comm_null:return
# Create data exchanger for ghost regions
self._synchronizer = get_data_exchanger(
cart = W.cart,
dtype = W.dtype,
coeff_shape = diags,
assembly = True,
axis = c_axis,
shape = self._data.shape
)
self._flip = tuple([1]*len(dims) if flip is None else flip)
self._permutation = list(range(len(dims)))
self._permutation[d_axis], self._permutation[c_axis] = self._permutation[c_axis], self._permutation[d_axis]
self._domain = V
self._codomain = W
self._domain_axis = d_axis
self._codomain_axis = c_axis
self._domain_ext = d_ext
self._codomain_ext = c_ext
self._domain_start = s_d
self._codomain_start = s_c
self._ndim = len(dims)
self._backend = None
# Prepare the arguments for the dot product method
nd = [(ej-sj+2*gp*mj-mj*p-gp)//mj*mi+1 for sj,ej,mj,mi,p,gp in zip(Vin.starts, Vin.ends, Vin.shifts, W.shifts, self._pads, Vin.pads)]
nc = [ei-si+1 for si,ei,mj,p in zip(W.starts, W.ends, Vin.shifts, self._pads)]
# Number of rows in matrix (along each dimension)
nrows = [min(ni,nj) for ni,nj in zip(nc, nd)]
nrows_extra = [max(0,ni-nj) for ni,nj in zip(nc, nd)]
nrows_extra[c_axis] = max(W.npts[c_axis]-Vin.npts[c_axis], 0)
nrows[c_axis] = W.pads[c_axis] + 1-diff-nrows_extra[c_axis]
args = {}
args['starts'] = tuple(Vin.starts)
args['nrows'] = tuple(nrows)
args['nrows_extra'] = tuple(nrows_extra)
args['gpads'] = tuple(Vin.pads)
args['pads'] = tuple(self._pads)
args['dm'] = tuple(Vin.shifts)
args['cm'] = tuple(W.shifts)
args['c_axis'] = c_axis
args['d_start'] = self._domain_start
args['c_start'] = self._codomain_start
args['flip'] = self._flip
args['permutation'] = self._permutation
self._dotargs_null = args
self._args = args.copy()
self._func = self._dot
self._transpose_args = self._prepare_transpose_args()
self._transpose_func = kernels['interface_transpose'][self._ndim]
if backend is None:
backend = PSYDAC_BACKENDS.get(os.environ.get('PSYDAC_BACKEND'))
if backend:
self.set_backend(backend)
# Flag ghost regions as not up-to-date (conservative choice)
self._sync = False
#--------------------------------------
# Abstract interface
#--------------------------------------
@property
def domain(self):
return self._domain
# ...
@property
def codomain(self):
return self._codomain
# ...
@property
def dtype(self):
return self.domain.dtype
# ...
def dot(self, v, out=None):
assert isinstance(v, StencilVector)
assert v.space is self.domain
# Necessary if vector space is distributed across processes
if out is not None:
assert isinstance(out, StencilVector)
assert out.space is self.codomain
out[(slice(None,None),)*v.space.ndim] = 0.
else:
out = StencilVector( self.codomain )
# Necessary if vector space is distributed across processes
if not v.ghost_regions_in_sync and not v.space.parallel:
v.update_ghost_regions()
self._func(self._data, v._interface_data[self._domain_axis, self._domain_ext], out._data, **self._args)
# IMPORTANT: flag that ghost regions are not up-to-date
out.ghost_regions_in_sync = False
return out
# ...
@staticmethod
def _dot(mat, v, out, starts, nrows, nrows_extra, gpads, pads, dm, cm, c_axis, d_start, c_start, flip, permutation):
# Index for k=i-j
nrows = list(nrows)
ndim = len(v.shape)
kk = [slice(None)]*ndim
diff = [xp-p for xp,p in zip(gpads, pads)]
ndiags, _ = list(zip(*[compute_diag_len(p,mj,mi, return_padding=True) for p,mi,mj in zip(pads,cm,dm)]))
bb = [p*m+p+1-n-s%m for p,m,n,s in zip(gpads, dm, ndiags, starts)]
nn = v.shape
for xx in np.ndindex( *nrows ):
ii = [ mi*pi + x for mi,pi,x in zip(cm, gpads, xx) ]
jj = tuple( slice(b-d+(x+s%mj)//mi*mj,b-d+(x+s%mj)//mi*mj+n) for x,mi,mj,b,s,n,d in zip(xx,cm,dm,bb,starts,ndiags,diff) )
jj = [flip_axis(i,n) if f==-1 else i for i,f,n in zip(jj,flip,nn)]
jj = tuple(jj[i] for i in permutation)
ii_kk = tuple( ii + kk )
ii[c_axis] += c_start
out[tuple(ii)] = np.dot( mat[ii_kk].flat, v[jj].flat )
new_nrows = nrows.copy()
for d,er in enumerate(nrows_extra):
rows = new_nrows.copy()
del rows[d]
for n in range(er):
for xx in np.ndindex(*rows):
xx = list(xx)
xx.insert(d, nrows[d]+n)
ii = [mi*pi + x for mi,pi,x in zip(cm, gpads, xx)]
ee = [max(x-l+1,0) for x,l in zip(xx, nrows)]
jj = tuple( slice(b-d+(x+s%mj)//mi*mj, b-d+(x+s%mj)//mi*mj+n-e) for x,mi,mj,d,e,b,s,n in zip(xx, cm, dm, diff, ee, bb, starts, ndiags) )
jj = [flip_axis(i,n) if f==-1 else i for i,f,n in zip(jj, flip, nn)]
jj = tuple(jj[i] for i in permutation)
kk = [slice(None,n-e) for n,e in zip(ndiags, ee)]
ii_kk = tuple( ii + kk )
ii[c_axis] += c_start
out[tuple(ii)] = np.dot( mat[ii_kk].flat, v[jj].flat )
new_nrows[d] += er
# ...
def transpose( self, conjugate=False, out=None):
""" Create new StencilInterfaceMatrix Mt, where domain and codomain are swapped
with respect to original matrix M, and Mt_{ij} = M_{ji}.
"""
# For clarity rename self
M = self
if out is None:
# Create new matrix where domain and codomain are swapped
out = StencilInterfaceMatrix(M.codomain, M.domain, M.codomain_start, M.domain_start, M.codomain_axis, M.domain_axis, M.codomain_ext, M.domain_ext,
flip=M.flip, pads=M.pads, backend=M.backend)
# Call low-level '_transpose' function (works on Numpy arrays directly)
if conjugate:
M._transpose_func(np.conjugate(M._data), out._data, **M._transpose_args)
else:
M._transpose_func(M._data, out._data, **M._transpose_args)
return out
def _prepare_transpose_args(self):
#prepare the arguments for the transpose method
V = self.domain
W = self.codomain
ssc = W.starts
eec = W.ends
ssd = V.interfaces[self._domain_axis, self._domain_ext].starts
eed = V.interfaces[self._domain_axis, self._domain_ext].ends
pads = self._pads
gpads = V.pads
dm = V.shifts
cm = W.shifts
dim = self._codomain_axis
# Number of rows in the transposed matrix (along each dimension)
nrows = [e-s+1 for s,e in zip(ssd, eed)]
ncols = [e-s+1+2*m*p for s, e, m, p in zip(ssc, eec, cm, gpads)]
pp = pads
ndiags, starts = list(zip(*[compute_diag_len(p,mi,mj, return_padding=True) for p, mi, mj in zip(pp, cm, dm)]))
ndiagsT, _ = list(zip(*[compute_diag_len(p,mj,mi, return_padding=True) for p, mi, mj in zip(pp, cm, dm)]))
diff = [gp-p for gp, p in zip(gpads, pp)]
sl = [(s if mi > mj else 0) + (s % mi + mi//mj if mi < mj else 0)+(s if mi == mj else 0)\
for s, p, mi, mj in zip(starts, pp, cm, dm)]
si = [(mi * p - mi * (int(np.ceil((p + 1)/mj)) - 1) if mi > mj else 0) + \
(mi * p - mi * (p//mi) + d * (mi - 1) if mi < mj else 0) + \
(mj * p - mj * (p//mi) + d * (mi - 1) if mi == mj else 0)\
for mi, mj, p, d in zip(cm, dm, pp, diff)]
sk = [n - 1\
+ (-(p % mj) if mi > mj else 0)\
+ (-p + mj * (p//mi) if mi < mj else 0)\
+ (-p + mj * (p//mi) if mi == mj else 0)\
for mi, mj, n, p in zip(cm, dm, ndiagsT, pp)]
if V.parent_ends[dim] is not None:
diff_r = min(1, V.parent_ends[dim] - V.ends[dim])
else:
diff_r = 0
if W.parent_ends[dim] is not None:
diff_c = min(1, W.parent_ends[dim] - W.ends[dim])
else:
diff_c = 0
nrows[dim] = pads[dim] + 1 - diff_r
ncols[dim] = pads[dim] + 1 - diff_c + 2*cm[dim]*pads[dim]
args = {}
args['n'] = np.int64(nrows)
args['nc'] = np.int64(ncols)
args['gp'] = np.int64(gpads)
args['p'] = np.int64(pp)
args['dm'] = np.int64(dm)
args['cm'] = np.int64(cm)
args['nd'] = np.int64(ndiags)
args['ndT'] = np.int64(ndiagsT)
args['si'] = np.int64(si)
args['sk'] = np.int64(sk)
args['sl'] = np.int64(sl)
return args
# ...
def toarray(self, **kwargs):
order = kwargs.pop('order', 'C')
with_pads = kwargs.pop('with_pads', False)
if self.codomain.parallel and with_pads:
coo = self._tocoo_parallel_with_pads()
else:
coo = self._tocoo_no_pads()
return coo.toarray()
# ...
def tosparse(self, **kwargs):
order = kwargs.pop('order', 'C')
with_pads = kwargs.pop('with_pads', False)
if self.codomain.parallel and with_pads:
coo = self._tocoo_parallel_with_pads()
else:
coo = self._tocoo_no_pads()
return coo
#...
def copy(self):
M = StencilInterfaceMatrix( self._domain, self._codomain,
self._domain_start, self._codomain_start,
self._domain_axis, self._codomain_axis,
self._domain_ext, self._codomain_ext,
flip=self._flip, pads=self._pads,
backend=self._backend )
M._data[:] = self._data[:]
return M
# ...
def __neg__(self):
return self.__mul__(-1)
#...
def __mul__(self, a):
w = self.copy()
w._data *= a
w._sync = self._sync
return w
#...
def __add__(self, m):
raise NotImplementedError('TODO: StencilInterfaceMatrix.__add__')
#...
def __sub__(self, m):
raise NotImplementedError('TODO: StencilInterfaceMatrix.__sub__')
#...
def __imul__(self, a):
self._data *= a
#...
def __iadd__(self, m):
raise NotImplementedError('TODO: StencilInterfaceMatrix.__iadd__')
#...
def __isub__(self, m):
raise NotImplementedError('TODO: StencilInterfaceMatrix.__isub__')
#--------------------------------------
# Other properties/methods
#--------------------------------------
# ...
@property
def domain_axis(self):
return self._domain_axis
# ...
@property
def codomain_axis(self):
return self._codomain_axis
# ...
@property
def domain_ext(self):
return self._domain_ext
# ...
@property
def codomain_ext(self):
return self._codomain_ext
# ...
@property
def domain_start(self):
return self._domain_start
# ...
@property
def codomain_start(self):
return self._codomain_start
# ...
@property
def dim(self):
return self._ndim
# ...
@property
def flip(self):
return self._flip
# ...
@property
def permutation(self):
return self._permutation
# ...
@property
def pads(self):
return self._pads
# ...
def __getitem__(self, key):
index = self._getindex( key )
return self._data[index]
# ...
def __setitem__(self, key, value):
index = self._getindex( key )
self._data[index] = value
#...
def max(self):
return self._data.max()
# ...
@property
def backend(self):
return self._backend
#--------------------------------------
# Private methods
#--------------------------------------
def _getindex(self, key):
nd = self._ndim
ii = key[:nd]
kk = key[nd:]
index = []
for i,s,p in zip(ii, self._codomain.starts, self._codomain.pads):
x = self._shift_index(i, p-s)
index.append(x)
for k,p in zip(kk, self._pads):
l = self._shift_index(k, p)
index.append(l)
return tuple(index)
# ...
@staticmethod
def _shift_index(index, shift):
if isinstance(index, slice):
start = None if index.start is None else index.start + shift
stop = None if index.stop is None else index.stop + shift
return slice(start, stop, index.step)
else:
return index + shift
#...
def _tocoo_no_pads(self):
# Shortcuts
nr = self.codomain.npts
nc = self.domain.npts
ss = self.codomain.starts
pp = self.codomain.pads
nd = len(pp)
dim = self._codomain_axis
flip = self.flip
permutation = self.permutation
c_start = self.codomain_start
d_start = self.domain_start
dm = self.domain.shifts
cm = self.codomain.shifts
ravel_multi_index = np.ravel_multi_index
# COO storage
rows = []
cols = []
data = []
# Range of data owned by local process (no ghost regions)
local = tuple( [slice(m*p,-m*p) if p != 0 else slice(0, None) for m,p in zip(cm, pp)] + [slice(None)] * nd )
pp = [compute_diag_len(p,mj,mi)-(p+1) for p,mi,mj in zip(self._pads, cm, dm)]
for (index,value) in np.ndenumerate( self._data[local] ):
if value:
# index = [i1, i2, ..., p1+j1-i1, p2+j2-i2, ...]
xx = index[:nd] # x=i-s
ll = index[nd:] # l=p+k
ii = [s+x for s,x in zip(ss,xx)]
di = [i//m for i,m in zip(ii,cm)]
jj = [(i*m+l-p)%n for (i,m,l,n,p) in zip(di,dm,ll,nc,pp)]
ii[dim] += c_start
jj[dim] += d_start
jj = [n-j-1 if f==-1 else j for j,f,n in zip(jj,flip,nc)]
jj = [jj[i] for i in permutation]
I = ravel_multi_index(ii, dims=nr, order='C')
J = ravel_multi_index(jj, dims=nc, order='C')
rows.append(I)
cols.append(J)
data.append(value)
M = coo_matrix(
(data,(rows,cols)),
shape = [np.prod(nr),np.prod(nc)],
dtype = self.domain.dtype)
return M
# ...
@property
def ghost_regions_in_sync(self):
return self._sync
# ...
# NOTE: this property must be set collectively
@ghost_regions_in_sync.setter
def ghost_regions_in_sync(self, value):
assert isinstance(value, bool)
self._sync = value
# ...
def _update_ghost_regions_serial(self, direction: int):
if direction is None:
for d in range(self._codomain.ndim):
self._update_ghost_regions_serial(d)
return
ndim = self._codomain.ndim
periodic = self._codomain.periods[direction]
p = self._codomain.pads [direction]
if p == 0:
return
idx_front = [slice(None)] * direction
idx_back = [slice(None)] * (ndim-direction-1)
if periodic:
# Copy data from left to right
idx_from = tuple(idx_front + [slice( p, 2*p)] + idx_back)
idx_to = tuple(idx_front + [slice(-p,None)] + idx_back)
self._data[idx_to] = self._data[idx_from]
# Copy data from right to left
idx_from = tuple(idx_front + [slice(-2*p,-p)] + idx_back)
idx_to = tuple(idx_front + [slice(None, p)] + idx_back)
self._data[idx_to] = self._data[idx_from]
else:
# Set left ghost region to zero
idx_ghost = tuple(idx_front + [slice(None, p)] + idx_back)
self._data[idx_ghost] = 0
# Set right ghost region to zero
idx_ghost = tuple(idx_front + [slice(-p,None)] + idx_back)
self._data[idx_ghost] = 0
# ...
def exchange_assembly_data(self):
"""
Exchange assembly data.
"""
ndim = self._codomain.ndim
parallel = self._codomain.parallel
if self._codomain.parallel:
# PARALLEL CASE: fill in ghost regions with data from neighbors
self._synchronizer.start_exchange_assembly_data(self._data)
self._synchronizer. end_exchange_assembly_data(self._data)
else:
# SERIAL CASE: fill in ghost regions along periodic directions, otherwise set to zero
self._exchange_assembly_data_serial()
# ...
def _exchange_assembly_data_serial(self):
ndim = self._codomain.ndim
for direction in range(ndim):
if direction == self._codomain_axis:
continue
periodic = self._codomain.periods[direction]
p = self._codomain.pads [direction]
m = self._codomain.shifts [direction]
if periodic:
idx_front = [slice(None)] * direction
idx_back = [slice(None)] * (ndim-direction-1)
# Copy data from left to right
idx_to = tuple(idx_front + [slice( m*p, m*p+p)] + idx_back)
idx_from = tuple(idx_front + [slice(-m*p,-m*p+p) if (-m*p+p)!=0 else slice(-m*p, None)] + idx_back)
self._data[idx_to] += self._data[idx_from]
# ...
def set_backend(self, backend, precompiled=False):
raise AttributeError(f'This is the tiny-psydac version - must use precompiled kernels (but {precompiled = })!')
from psydac.api.ast.linalg import LinearOperatorDot
self._backend = backend
self._args = self._dotargs_null.copy()
if self._backend is None:
self._func = self._dot
else:
if self.domain.parallel:
comm = self.domain.interfaces[self._domain_axis, self._domain_ext].cart.local_comm
if self.domain == self.codomain:
# In this case nrows_extra[i] == 0 for all i
dot = LinearOperatorDot(self._ndim,
block_shape = (1,1),
keys = ((0,0),),
comm = comm,
backend=frozenset(backend.items()),
nrows_extra=(self._args['nrows_extra'],),
gpads=(self._args['gpads'],),
pads=(self._args['pads'],),
dm = (self._args['dm'],),
cm = (self._args['cm'],),
interface=True,
flip_axis=self._flip,
interface_axis=self._codomain_axis,
d_start=(self._domain_start,),
c_start=(self._codomain_start,),
dtype= self.dtype)
starts = self._args.pop('starts')
nrows = self._args.pop('nrows')
self._args = {}
for i in range(len(nrows)):
self._args['s00_{i}'.format(i=i+1)] = np.int64(starts[i])
for i in range(len(nrows)):
self._args['n00_{i}'.format(i=i+1)] = np.int64(nrows[i])
else:
dot = LinearOperatorDot(self._ndim,
block_shape = (1,1),
keys = ((0,0),),
comm = comm,
backend=frozenset(backend.items()),
gpads=(self._args['gpads'],),
pads=(self._args['pads'],),
dm = (self._args['dm'],),
cm = (self._args['cm'],),
interface=True,
flip_axis=self._flip,
interface_axis=self._codomain_axis,
d_start=(self._domain_start,),
c_start=(self._codomain_start,),
dtype= self.dtype)
starts = self._args.pop('starts')
nrows = self._args.pop('nrows')
nrows_extra = self._args.pop('nrows_extra')
self._args = {}
for i in range(len(nrows)):
self._args['s00_{i}'.format(i=i+1)] = np.int64(starts[i])
for i in range(len(nrows)):
self._args['n00_{i}'.format(i=i+1)] = np.int64(nrows[i])
for i in range(len(nrows)):
self._args['ne00_{i}'.format(i=i+1)] = np.int64(nrows_extra[i])
else:
dot = LinearOperatorDot(self._ndim,
block_shape = (1,1),
keys = ((0,0),),
comm = None,
backend=frozenset(backend.items()),
starts = (tuple(self._args['starts']),),
nrows=(self._args['nrows'],),
nrows_extra=(self._args['nrows_extra'],),
gpads=(self._args['gpads'],),
pads=(self._args['pads'],),
dm = (self._args['dm'],),
cm = (self._args['cm'],),
interface=True,
flip_axis=self._flip,
interface_axis=self._codomain_axis,
d_start=(self._domain_start,),
c_start=(self._codomain_start,),
dtype= self.dtype)
self._args = {}
self._func = dot.func
#===============================================================================
del VectorSpace, Vector