From da77e25f285087664d85a2a92ee4dca88d63e44b Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Thu, 20 Nov 2025 10:48:23 +0100 Subject: [PATCH 01/28] Add CUDA Sparse Solvers --- xobjects/SparseSolvers/CUDA/cuDSSLU.py | 81 +++++++ xobjects/SparseSolvers/CUDA/luLU.py | 307 +++++++++++++++++++++++++ 2 files changed, 388 insertions(+) create mode 100644 xobjects/SparseSolvers/CUDA/cuDSSLU.py create mode 100644 xobjects/SparseSolvers/CUDA/luLU.py diff --git a/xobjects/SparseSolvers/CUDA/cuDSSLU.py b/xobjects/SparseSolvers/CUDA/cuDSSLU.py new file mode 100644 index 0000000..e15f2ae --- /dev/null +++ b/xobjects/SparseSolvers/CUDA/cuDSSLU.py @@ -0,0 +1,81 @@ +import cupy as cp +import cupyx.scipy.sparse as sp +import nvmath + +class DirectSolverSuperLU(nvmath.sparse.advanced.DirectSolver): + """cuDSS-based direct solver; reuse factors for many RHS (SuperLU-style).""" + + def __init__(self, A, *, n_batches: int = 0,assume_general=True, order_rhs='auto', **kwargs): + # 1) Validate A + if not isinstance(A, sp.csr_matrix): + raise TypeError("A must be cupyx.scipy.sparse.csr_matrix") + if A.shape[0] != A.shape[1]: + raise ValueError("A must be square") + if A.indices.dtype != cp.int32 or A.indptr.dtype != cp.int32: + A = sp.csr_matrix((A.data, A.indices.astype(cp.int32), A.indptr.astype(cp.int32)), + shape=A.shape) + # 2) Sample RHS to initialize parent + if n_batches == 0: + b_sample = cp.zeros(A.shape[0], dtype=A.dtype) + else: + b_sample = cp.zeros((A.shape[0],n_batches), dtype=A.dtype, order = 'F') + self.b_dtype = A.dtype + super().__init__(A, b_sample, **kwargs) + + # 3) Optional: configure plan + pc = self.plan_config + if assume_general: + # LU path (cuDSS will often infer this, but we can be explicit if available) + try: + pc.matrix_type = nvmath.sparse.advanced.DirectSolverMatrixType.GENERAL + except AttributeError: + pass # older wheels infer type; safe to ignore + # Expose a simple RHS layout policy + self._order_rhs = order_rhs # 'F'|'C'|'auto' + + # 4) Build & factorize once; warmup solve to build internal caches + super().plan() + self.fac_info = super().factorize() + # Warmup on zeros (uses parent solve()) + super().reset_operands(b=b_sample) + super().solve() + + def _prepare_rhs(self, b): + # Accept 1-D or 2-D; ensure dtype/device and layout + if b.dtype != self.b_dtype: + raise TypeError(f"RHS dtype {b.dtype} does not match matrix dtype {self.b_dtype}") + if b.ndim == 1: + return b # vector is fine + if self._order_rhs == 'auto': + # cuDSS prefers column-major for (n,k) + return cp.array(b, order='F', copy=False) + return cp.array(b, order=self._order_rhs, copy=False) + + def solve(self, b): + """Solve A x = b using cached factors. Accepts (n,) or (n,k) RHS.""" + b = self._prepare_rhs(b) + super().reset_operands(b=b) + return super().solve() + + # Optional helpers for SuperLU-like API + def __call__(self, b): # x = solver(b) + return self.solve(b) + + def refactorize_values(self, A_new): + """Refactorize when numerical values change but sparsity pattern is the same.""" + if not isinstance(A_new, sp.csr_matrix): + raise TypeError("A_new must be CSR") + if A_new.shape != self.operands.a_shape: + raise ValueError("Shape mismatch") + if A_new.indices.dtype != cp.int32 or A_new.indptr.dtype != cp.int32: + A_new = sp.csr_matrix((A_new.data, A_new.indices.astype(cp.int32), + A_new.indptr.astype(cp.int32)), shape=A_new.shape) + super().reset_operands(A=A_new) + self.fac_info = self.factorize() + + def close(self): + """Explicitly free resources when not using a context manager.""" + try: + self.__exit__(None, None, None) # DirectSolver is context-manageable + except Exception: + pass \ No newline at end of file diff --git a/xobjects/SparseSolvers/CUDA/luLU.py b/xobjects/SparseSolvers/CUDA/luLU.py new file mode 100644 index 0000000..6bd4c8a --- /dev/null +++ b/xobjects/SparseSolvers/CUDA/luLU.py @@ -0,0 +1,307 @@ +import cupy as _cupy +import numpy as _numpy +import cupyx.scipy.sparse +from cupyx.scipy.sparse.linalg import SuperLU +from cupy_backends.cuda.api import driver as _driver +from cupy_backends.cuda.api import runtime as _runtime +from cupy_backends.cuda.libs import cusparse as _cusparse +from cupy._core import _dtype +from cupy.cuda import device as _device +from cupy.cuda import stream as _stream +from cupyx.cusparse import _dtype_to_IndexType, SpMatDescriptor, DnMatDescriptor, check_availability +import scipy.sparse.linalg +try: + import scipy.sparse.linalg + scipy_available = True +except ImportError: + scipy_available = False +from line_profiler import profile + +class CachedAbSolver: + + def __init__(self, a, b, alpha=1.0, lower=True, unit_diag=False, transa=False): + if not check_availability('spsm'): + raise RuntimeError('spsm is not available.') + + # Canonicalise transa + if transa is False: + transa = 'N' + elif transa is True: + transa = 'T' + elif transa not in 'NTH': + raise ValueError(f'Unknown transa (actual: {transa})') + + # Check A's type and sparse format + if cupyx.scipy.sparse.isspmatrix_csr(a): + pass + elif cupyx.scipy.sparse.isspmatrix_csc(a): + if transa == 'N': + a = a.T + transa = 'T' + elif transa == 'T': + a = a.T + transa = 'N' + elif transa == 'H': + a = a.conj().T + transa = 'N' + lower = not lower + elif cupyx.scipy.sparse.isspmatrix_coo(a): + pass + else: + raise ValueError('a must be CSR, CSC or COO sparse matrix') + assert a.has_canonical_format + + # Check B's ndim + if b.ndim == 1: + is_b_vector = True + b = b.reshape(-1, 1) + elif b.ndim == 2: + is_b_vector = False + else: + raise ValueError('b.ndim must be 1 or 2') + self.is_b_vector = is_b_vector + + # Check shapes + if not (a.shape[0] == a.shape[1] == b.shape[0]): + raise ValueError('mismatched shape') + + # Check dtypes + dtype = a.dtype + if dtype.char not in 'fdFD': + raise TypeError('Invalid dtype (actual: {})'.format(dtype)) + if dtype != b.dtype: + raise TypeError('dtype mismatch') + self.dtype = dtype + + # Prepare fill mode + if lower is True: + fill_mode = _cusparse.CUSPARSE_FILL_MODE_LOWER + elif lower is False: + fill_mode = _cusparse.CUSPARSE_FILL_MODE_UPPER + else: + raise ValueError('Unknown lower (actual: {})'.format(lower)) + self.fill_mode = fill_mode + + # Prepare diag type + if unit_diag is False: + diag_type = _cusparse.CUSPARSE_DIAG_TYPE_NON_UNIT + elif unit_diag is True: + diag_type = _cusparse.CUSPARSE_DIAG_TYPE_UNIT + else: + raise ValueError('Unknown unit_diag (actual: {})'.format(unit_diag)) + self.diag_type = diag_type + self.transa = transa + # Prepare op_a + if transa == 'N': + op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE + elif transa == 'T': + op_a = _cusparse.CUSPARSE_OPERATION_TRANSPOSE + else: # transa == 'H' + if dtype.char in 'fd': + op_a = _cusparse.CUSPARSE_OPERATION_TRANSPOSE + else: + op_a = _cusparse.CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE + self.op_a = op_a + # Prepare op_b + self.op_b = self._get_opb(b) + + # Allocate space for matrix C. Note that it is known cusparseSpSM requires + # the output matrix zero initialized. + m, _ = a.shape + if self.op_b == _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE: + _, n = b.shape + else: + n, _ = b.shape + c_shape = m, n + self.c_shape = c_shape + + self._perform_analysis(a, b, alpha=alpha) + + def _perform_analysis(self, a, b, alpha=1.0): + """Solves a sparse triangular linear system op(a) * x = alpha * op(b). + + Args: + a (cupyx.scipy.sparse.csr_matrix or cupyx.scipy.sparse.coo_matrix): + Sparse matrix with dimension ``(M, M)``. + b (cupy.ndarray): Dense matrix with dimension ``(M, K)``. + alpha (float or complex): Coefficient. + lower (bool): + True: ``a`` is lower triangle matrix. + False: ``a`` is upper triangle matrix. + unit_diag (bool): + True: diagonal part of ``a`` has unit elements. + False: diagonal part of ``a`` has non-unit elements. + transa (bool or str): True, False, 'N', 'T' or 'H'. + 'N' or False: op(a) == ``a``. + 'T' or True: op(a) == ``a.T``. + 'H': op(a) == ``a.conj().T``. + """ + + c = _cupy.zeros(self.c_shape, dtype=a.dtype, order='f') + + # Prepare descriptors and other parameters + self.handle = _device.get_cusparse_handle() + self.mat_a = SpMatDescriptor.create(a) + mat_b = DnMatDescriptor.create(b) + mat_c = DnMatDescriptor.create(c) + self.spsm_descr = _cusparse.spSM_createDescr() + self.alpha = _numpy.array(alpha, dtype=c.dtype).ctypes + self.cuda_dtype = _dtype.to_cuda_dtype(c.dtype) + self.algo = _cusparse.CUSPARSE_SPSM_ALG_DEFAULT + + try: + # Specify Lower|Upper fill mode + self.mat_a.set_attribute(_cusparse.CUSPARSE_SPMAT_FILL_MODE, self.fill_mode) + + # Specify Unit|Non-Unit diagonal type + self.mat_a.set_attribute(_cusparse.CUSPARSE_SPMAT_DIAG_TYPE, self.diag_type) + + # Allocate the workspace needed by the succeeding phases + buff_size = _cusparse.spSM_bufferSize( + self.handle, self.op_a, self.op_b, self.alpha.data, self.mat_a.desc, mat_b.desc, + mat_c.desc, self.cuda_dtype, self.algo, self.spsm_descr) + self.buff = _cupy.empty(buff_size, dtype=_cupy.int8) + + # Perform the analysis phase + _cusparse.spSM_analysis( + self.handle, self.op_a, self.op_b, self.alpha.data, self.mat_a.desc, mat_b.desc, + mat_c.desc, self.cuda_dtype, self.algo, self.spsm_descr, self.buff.data.ptr) + except Exception as e: + raise RuntimeError('spSM_analysis failed.') from e + + def _get_opb(self, b): + # Prepare op_b + if b._f_contiguous: + op_b = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE + elif b._c_contiguous: + if _cusparse.get_build_version() < 11701: # earlier than CUDA 11.6 + raise ValueError('b must be F-contiguous.') + b = b.T + op_b = _cusparse.CUSPARSE_OPERATION_TRANSPOSE + else: + raise ValueError('b must be F-contiguous or C-contiguous.') + return op_b + @profile + def solve(self, b): + assert b.dtype == self.dtype + assert self._get_opb(b) == self.op_b + + if b.ndim == 1: + is_b_vector = True + b = b.reshape(-1, 1) + elif b.ndim == 2: + is_b_vector = False + else: + raise ValueError('b.ndim must be 1 or 2') + self.is_b_vector = is_b_vector + + c = _cupy.zeros(self.c_shape, dtype=self.dtype, order='f') + mat_b = DnMatDescriptor.create(b) + mat_c = DnMatDescriptor.create(c) + try: + # Executes the solve phase + _cusparse.spSM_solve( + self.handle, self.op_a, self.op_b, self.alpha.data, self.mat_a.desc, mat_b.desc, + mat_c.desc, self.cuda_dtype, self.algo, self.spsm_descr, self.buff.data.ptr) + finally: + _cupy.cuda.get_current_stream().synchronize() + # Reshape back if B was a vector + if self.is_b_vector: + c = c.reshape(-1) + return c + + # def __del__(self): + # # Destroy matrix descriptor + # print("Deleting solver obj...") + # _cusparse.spSM_destroyDescr(self.spsm_descr) + +class luLU(SuperLU): + + def __init__(self, A, trans = 'N', permc_spec=None, n_batches: int = 0, diag_pivot_thresh=None, relax=None, + panel_size=None, options={}): + if not check_availability('spsm'): + raise RuntimeError('spsm is not available.') + if not scipy_available: + raise RuntimeError('scipy is not available') + if not cupyx.scipy.sparse.isspmatrix(A): + raise TypeError('A must be cupyx.scipy.sparse.spmatrix') + if A.shape[0] != A.shape[1]: + raise ValueError('A must be a square matrix (A.shape: {})' + .format(A.shape)) + if A.dtype.char not in 'fdFD': + raise TypeError('Invalid dtype (actual: {})'.format(A.dtype)) + + a = A.get().tocsc() + a_slu = scipy.sparse.linalg.splu( + a, permc_spec=permc_spec, diag_pivot_thresh=diag_pivot_thresh, + relax=relax, panel_size=panel_size, options=options) + super().__init__(a_slu) + self.b_dtype = self.L.dtype + self._init_solvers(trans=trans, n_batches = n_batches) + + def _init_solvers(self,trans = 'N', n_batches = 0): + if n_batches == 0: + b_sample = _cupy.zeros(self.shape[0], dtype=self.b_dtype) + else: + b_sample = _cupy.zeros((self.shape[0],n_batches), dtype=self.b_dtype, order = 'F') + self.trans = trans + self.Lsolver = CachedAbSolver(self.L, b_sample, lower=True, transa=self.trans) + self.Usolver = CachedAbSolver(self.U, b_sample, lower=False, transa=self.trans) + # self.Usolver = CachedAbSolver(self.U.T, b_sample, lower=True, transa="T") #Can improve performance at times + @profile + def solve(self, rhs, trans='N'): + """Solves linear system of equations with one or several right-hand sides. + + Args: + rhs (cupy.ndarray): Right-hand side(s) of equation with dimension + ``(M)`` or ``(M, K)``. + trans (str): 'N', 'T' or 'H'. + 'N': Solves ``A * x = rhs``. + 'T': Solves ``A.T * x = rhs``. + 'H': Solves ``A.conj().T * x = rhs``. + + Returns: + cupy.ndarray: + Solution vector(s) + """ # NOQA + from cupyx import cusparse + if trans != self.trans: + raise AssertionError("Solve function assumes cached configuration. Rebuild cache by calling _init_solvers with desired configuration.") + if not isinstance(rhs, _cupy.ndarray): + raise TypeError('ojb must be cupy.ndarray') + if rhs.ndim not in (1, 2): + raise ValueError('rhs.ndim must be 1 or 2 (actual: {})'. + format(rhs.ndim)) + if rhs.shape[0] != self.shape[0]: + raise ValueError('shape mismatch (self.shape: {}, rhs.shape: {})' + .format(self.shape, rhs.shape)) + if trans not in ('N', 'T', 'H'): + raise ValueError('trans must be \'N\', \'T\', or \'H\'') + + x = rhs.astype(self.L.dtype) + if trans == 'N': + if self.perm_r is not None: + if x.ndim == 2 and x._f_contiguous: + x = x.T[:, self._perm_r_rev].T # want to keep f-order + else: + x = x[self._perm_r_rev] + x = self.Lsolver.solve(x) + x = self.Usolver.solve(x) + if self.perm_c is not None: + x = x[self.perm_c] + else: + if self.perm_c is not None: + if x.ndim == 2 and x._f_contiguous: + x = x.T[:, self._perm_c_rev].T # want to keep f-order + else: + x = x[self._perm_c_rev] + x = self.Usolver.solve(x) + x = self.Lsolver.solve(x) + if self.perm_r is not None: + x = x[self.perm_r] + + if not x._f_contiguous: + # For compatibility with SciPy + x = x.copy(order='F') + return x + \ No newline at end of file From c8b0295cf815645280194bf76b7041cd0edfd150 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Thu, 20 Nov 2025 12:28:38 +0100 Subject: [PATCH 02/28] Update Cached Sparse Solvers for clarity --- xobjects/SparseSolvers/CUDA/cuDSSLU.py | 13 ++++++++++++- xobjects/SparseSolvers/CUDA/luLU.py | 26 +++++++++++++++++++++----- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/xobjects/SparseSolvers/CUDA/cuDSSLU.py b/xobjects/SparseSolvers/CUDA/cuDSSLU.py index e15f2ae..54a3504 100644 --- a/xobjects/SparseSolvers/CUDA/cuDSSLU.py +++ b/xobjects/SparseSolvers/CUDA/cuDSSLU.py @@ -5,7 +5,11 @@ class DirectSolverSuperLU(nvmath.sparse.advanced.DirectSolver): """cuDSS-based direct solver; reuse factors for many RHS (SuperLU-style).""" - def __init__(self, A, *, n_batches: int = 0,assume_general=True, order_rhs='auto', **kwargs): + def __init__(self, A, *, + n_batches: int = 0, + assume_general=True, + order_rhs='auto', + **kwargs): # 1) Validate A if not isinstance(A, sp.csr_matrix): raise TypeError("A must be cupyx.scipy.sparse.csr_matrix") @@ -20,6 +24,7 @@ def __init__(self, A, *, n_batches: int = 0,assume_general=True, order_rhs='auto else: b_sample = cp.zeros((A.shape[0],n_batches), dtype=A.dtype, order = 'F') self.b_dtype = A.dtype + self.b_shape = b_sample.shape super().__init__(A, b_sample, **kwargs) # 3) Optional: configure plan @@ -53,6 +58,12 @@ def _prepare_rhs(self, b): def solve(self, b): """Solve A x = b using cached factors. Accepts (n,) or (n,k) RHS.""" + assert b.shape == self.b_shape, ( + "Cached solver can only accept RHS with the same shape " + f"as the initialized value. {self.b_shape}. " + "The initialized RHS shape can be changed by initializing " + "using a different value for n_batches" + ) b = self._prepare_rhs(b) super().reset_operands(b=b) return super().solve() diff --git a/xobjects/SparseSolvers/CUDA/luLU.py b/xobjects/SparseSolvers/CUDA/luLU.py index 6bd4c8a..cbe5fb2 100644 --- a/xobjects/SparseSolvers/CUDA/luLU.py +++ b/xobjects/SparseSolvers/CUDA/luLU.py @@ -217,8 +217,14 @@ def solve(self, b): class luLU(SuperLU): - def __init__(self, A, trans = 'N', permc_spec=None, n_batches: int = 0, diag_pivot_thresh=None, relax=None, - panel_size=None, options={}): + def __init__(self, A, + trans = 'N', + permc_spec=None, + n_batches: int = 0, + diag_pivot_thresh=None, + relax=None, + panel_size=None, + options={}): if not check_availability('spsm'): raise RuntimeError('spsm is not available.') if not scipy_available: @@ -241,10 +247,13 @@ def __init__(self, A, trans = 'N', permc_spec=None, n_batches: int = 0, diag_piv def _init_solvers(self,trans = 'N', n_batches = 0): if n_batches == 0: - b_sample = _cupy.zeros(self.shape[0], dtype=self.b_dtype) + b_sample = _cupy.zeros(self.shape[0], + dtype=self.b_dtype) else: - b_sample = _cupy.zeros((self.shape[0],n_batches), dtype=self.b_dtype, order = 'F') + b_sample = _cupy.zeros((self.shape[0],n_batches), + dtype=self.b_dtype, order = 'F') self.trans = trans + self.b_shape = b_sample.shape self.Lsolver = CachedAbSolver(self.L, b_sample, lower=True, transa=self.trans) self.Usolver = CachedAbSolver(self.U, b_sample, lower=False, transa=self.trans) # self.Usolver = CachedAbSolver(self.U.T, b_sample, lower=True, transa="T") #Can improve performance at times @@ -266,7 +275,8 @@ def solve(self, rhs, trans='N'): """ # NOQA from cupyx import cusparse if trans != self.trans: - raise AssertionError("Solve function assumes cached configuration. Rebuild cache by calling _init_solvers with desired configuration.") + raise AssertionError("Solve function assumes cached configuration. " \ + "Rebuild cache by calling _init_solvers with desired configuration.") if not isinstance(rhs, _cupy.ndarray): raise TypeError('ojb must be cupy.ndarray') if rhs.ndim not in (1, 2): @@ -275,6 +285,12 @@ def solve(self, rhs, trans='N'): if rhs.shape[0] != self.shape[0]: raise ValueError('shape mismatch (self.shape: {}, rhs.shape: {})' .format(self.shape, rhs.shape)) + assert rhs.shape == self.b_shape, ( + "Cached solver can only accept RHS with the same shape " + f"as the initialized value. {self.b_shape}. " + "The initialized RHS shape can be changed by initializing " + "using a different value for n_batches" + ) if trans not in ('N', 'T', 'H'): raise ValueError('trans must be \'N\', \'T\', or \'H\'') From 2152ecd22df8d0e2f1c2c0608a0196fb5067e806 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Thu, 20 Nov 2025 12:29:54 +0100 Subject: [PATCH 03/28] Add context-aware Sparse Solver functionality --- xobjects/SparseSolvers/abstract_solver.py | 11 +++ xobjects/__init__.py | 1 + xobjects/sparse.py | 99 +++++++++++++++++++++++ 3 files changed, 111 insertions(+) create mode 100644 xobjects/SparseSolvers/abstract_solver.py create mode 100644 xobjects/sparse.py diff --git a/xobjects/SparseSolvers/abstract_solver.py b/xobjects/SparseSolvers/abstract_solver.py new file mode 100644 index 0000000..8a6022c --- /dev/null +++ b/xobjects/SparseSolvers/abstract_solver.py @@ -0,0 +1,11 @@ +from abc import ABC, abstractmethod + +class SuperLUlikeSolver(ABC): + + @abstractmethod + def __init__(self, A): + pass + + @abstractmethod + def solve(self,b): + return x \ No newline at end of file diff --git a/xobjects/__init__.py b/xobjects/__init__.py index 0d9f709..8f81cec 100644 --- a/xobjects/__init__.py +++ b/xobjects/__init__.py @@ -19,6 +19,7 @@ from .string import String from .struct import Struct, Field from .ref import Ref, UnionRef +from .sparse import factorized_sparse_solver from .context_cpu import ContextCpu from .context_pyopencl import ContextPyopencl diff --git a/xobjects/sparse.py b/xobjects/sparse.py new file mode 100644 index 0000000..541578d --- /dev/null +++ b/xobjects/sparse.py @@ -0,0 +1,99 @@ +import scipy.sparse +from typing import Optional, Literal, Union +from .context import XContext +from .context_cpu import ContextCpu +from .context_cupy import ContextCupy +from .context_pyopencl import ContextPyopencl +from .SparseSolvers.abstract_solver import SuperLUlikeSolver +try: + import cupyx.scipy.sparse + from cupyx import cusparse +except: + pass + +def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, + scipy.sparse.csc_matrix], + n_batches: int = 0, + force_solver: Optional[ + Literal["scipySLU", + "PyKLU", + "cuDSS", + "CachedSLU", + "cupySLU"] + ] = None, + solverKwargs: dict = None, + context: Union[ + Literal["cpu", + "cupy", + "pyopencl"], + XContext + ] = "cpu" + ) -> SuperLUlikeSolver: + if solverKwargs is None: + solverKwargs = {} + if context == "cpu" or isinstance(context, ContextCpu): + if 'permc_spec' not in solverKwargs: + solverKwargs = solverKwargs | {"permc_spec":"MMD_AT_PLUS_A"} + if force_solver is None or force_solver == "scipySLU": + if A.shape[0]*n_batches < 10**5: + import warnings + warnings.warn("For small matrices, using PyKLU " + "can provide improved performance") + solver = scipy.sparse.linalg.splu(A.tocsc(),**solverKwargs) + elif force_solver == "PyKLU": + import PyKLU.klu as PyKLU + solver = PyKLU.Klu(A.tocsc()) + else: + raise ValueError("Unrecognized CPU Sparse solver. Available options: " + "scipySLU, PyKLU") + + elif context == "cupy" or isinstance(context, ContextCupy): + + assert isinstance(A ,cupyx.scipy.sparse.csr_matrix), ( + "When using ContextCupy, input must be " + "cupyx.scipy.sparse.csr_matrix") + + if force_solver is not None and force_solver != "cuDSS": + if 'permc_spec' not in solverKwargs: + solverKwargs = solverKwargs | {"permc_spec":"MMD_AT_PLUS_A"} + if force_solver is None: + import warnings + try: + from .SparseSolvers.CUDA.cuDSSLU import DirectSolverSuperLU + solver = DirectSolverSuperLU(A, n_batches = n_batches, **solverKwargs) + except (ModuleNotFoundError, RuntimeError) as e: + warnings.warn("cuDSS not available. " + "Falling back to Cached-SuperLU (spsm) " + f"Encountered Error: {e}") + if 'permc_spec' not in solverKwargs: + solverKwargs = solverKwargs | {"permc_spec":"MMD_AT_PLUS_A"} + try: + if cusparse.check_availability('csrsm2'): + raise RuntimeError("csrsm2 is avaiable. " + "cupy SuperLU performs better " + "than Cached-SuperLU (spsm)") + from .SparseSolvers.CUDA.luLU import luLU + solver = luLU(A, n_batches = n_batches, **solverKwargs) + except RuntimeError as e: + warnings.warn("Cached-SuperLU (spsm) solver failed. " + "Falling back to cupy SuperLU. " + f"Error encountered: {e}") + solver = cupyx.scipy.sparse.linalg.splu(A, **solverKwargs) + elif force_solver == "cuDSS": + from .SparseSolvers.CUDA.cuDSSLU import DirectSolverSuperLU + solver = DirectSolverSuperLU(A, n_batches = n_batches, **solverKwargs) + elif force_solver == "CachedSLU": + from .SparseSolvers.CUDA.luLU import luLU + solver = luLU(A, n_batches = n_batches, **solverKwargs) + elif force_solver == "cupySLU": + solver = cupyx.scipy.sparse.linalg.splu(A, **solverKwargs) + else: + raise ValueError("Unrecognized CUDA Sparse solver. Available options: " + "cuDSS, CachedSLU, cupySLU") + elif context == "pyopencl" or isinstance(context, ContextPyopencl): + raise NotImplementedError("No sparse solver is currently available " + "for PyOpenCL context") + else: + raise ValueError("Invalid context. Available contexts are: " \ + "cpu, cupy, pyopencl") + return solver \ No newline at end of file From 2d54741e3917b229d9a33fbd6430e79aec2c34d6 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Fri, 21 Nov 2025 10:23:38 +0100 Subject: [PATCH 04/28] Refactor sparse module --- xobjects/sparse/__init__.py | 1 + xobjects/{ => sparse}/sparse.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) create mode 100644 xobjects/sparse/__init__.py rename xobjects/{ => sparse}/sparse.py (90%) diff --git a/xobjects/sparse/__init__.py b/xobjects/sparse/__init__.py new file mode 100644 index 0000000..5f88bc6 --- /dev/null +++ b/xobjects/sparse/__init__.py @@ -0,0 +1 @@ +from .sparse import factorized_sparse_solver \ No newline at end of file diff --git a/xobjects/sparse.py b/xobjects/sparse/sparse.py similarity index 90% rename from xobjects/sparse.py rename to xobjects/sparse/sparse.py index 541578d..a7ba025 100644 --- a/xobjects/sparse.py +++ b/xobjects/sparse/sparse.py @@ -1,10 +1,10 @@ import scipy.sparse from typing import Optional, Literal, Union -from .context import XContext -from .context_cpu import ContextCpu -from .context_cupy import ContextCupy -from .context_pyopencl import ContextPyopencl -from .SparseSolvers.abstract_solver import SuperLUlikeSolver +from ..context import XContext +from ..context_cpu import ContextCpu +from ..context_cupy import ContextCupy +from ..context_pyopencl import ContextPyopencl +from ..SparseSolvers.abstract_solver import SuperLUlikeSolver try: import cupyx.scipy.sparse from cupyx import cusparse @@ -59,7 +59,7 @@ def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, if force_solver is None: import warnings try: - from .SparseSolvers.CUDA.cuDSSLU import DirectSolverSuperLU + from ..SparseSolvers.CUDA.cuDSSLU import DirectSolverSuperLU solver = DirectSolverSuperLU(A, n_batches = n_batches, **solverKwargs) except (ModuleNotFoundError, RuntimeError) as e: warnings.warn("cuDSS not available. " @@ -72,7 +72,7 @@ def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, raise RuntimeError("csrsm2 is avaiable. " "cupy SuperLU performs better " "than Cached-SuperLU (spsm)") - from .SparseSolvers.CUDA.luLU import luLU + from ..SparseSolvers.CUDA.luLU import luLU solver = luLU(A, n_batches = n_batches, **solverKwargs) except RuntimeError as e: warnings.warn("Cached-SuperLU (spsm) solver failed. " @@ -80,10 +80,10 @@ def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, f"Error encountered: {e}") solver = cupyx.scipy.sparse.linalg.splu(A, **solverKwargs) elif force_solver == "cuDSS": - from .SparseSolvers.CUDA.cuDSSLU import DirectSolverSuperLU + from ..SparseSolvers.CUDA.cuDSSLU import DirectSolverSuperLU solver = DirectSolverSuperLU(A, n_batches = n_batches, **solverKwargs) elif force_solver == "CachedSLU": - from .SparseSolvers.CUDA.luLU import luLU + from ..SparseSolvers.CUDA.luLU import luLU solver = luLU(A, n_batches = n_batches, **solverKwargs) elif force_solver == "cupySLU": solver = cupyx.scipy.sparse.linalg.splu(A, **solverKwargs) From 0d0ef4f77a75865cb2efbe3b47b878c1e27a7a0f Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Fri, 21 Nov 2025 11:29:49 +0100 Subject: [PATCH 05/28] Package sparse solvers in a nice interface --- xobjects/__init__.py | 2 +- xobjects/sparse/__init__.py | 4 +++- xobjects/sparse/solvers/CPU/__init__.py | 2 ++ xobjects/sparse/solvers/CUDA/__init__.py | 13 +++++++++++++ .../cuDSSLU.py => sparse/solvers/CUDA/_cuDSSLU.py} | 0 .../CUDA/luLU.py => sparse/solvers/CUDA/_luLU.py} | 0 xobjects/sparse/solvers/__init__.py | 7 +++++++ .../solvers/_abstract_solver.py} | 0 xobjects/sparse/sparse.py | 10 +++++----- 9 files changed, 31 insertions(+), 7 deletions(-) create mode 100644 xobjects/sparse/solvers/CPU/__init__.py create mode 100644 xobjects/sparse/solvers/CUDA/__init__.py rename xobjects/{SparseSolvers/CUDA/cuDSSLU.py => sparse/solvers/CUDA/_cuDSSLU.py} (100%) rename xobjects/{SparseSolvers/CUDA/luLU.py => sparse/solvers/CUDA/_luLU.py} (100%) create mode 100644 xobjects/sparse/solvers/__init__.py rename xobjects/{SparseSolvers/abstract_solver.py => sparse/solvers/_abstract_solver.py} (100%) diff --git a/xobjects/__init__.py b/xobjects/__init__.py index 8f81cec..8c2f5a7 100644 --- a/xobjects/__init__.py +++ b/xobjects/__init__.py @@ -19,7 +19,7 @@ from .string import String from .struct import Struct, Field from .ref import Ref, UnionRef -from .sparse import factorized_sparse_solver +from . import sparse from .context_cpu import ContextCpu from .context_pyopencl import ContextPyopencl diff --git a/xobjects/sparse/__init__.py b/xobjects/sparse/__init__.py index 5f88bc6..b10286c 100644 --- a/xobjects/sparse/__init__.py +++ b/xobjects/sparse/__init__.py @@ -1 +1,3 @@ -from .sparse import factorized_sparse_solver \ No newline at end of file +from .sparse import factorized_sparse_solver +from . import solvers +__all__ = ["factorized_sparse_solver","solvers"] diff --git a/xobjects/sparse/solvers/CPU/__init__.py b/xobjects/sparse/solvers/CPU/__init__.py new file mode 100644 index 0000000..3c30baf --- /dev/null +++ b/xobjects/sparse/solvers/CPU/__init__.py @@ -0,0 +1,2 @@ +from scipy.sparse.linalg import splu as scipySuperLU +__all__ = ["scipySuperLU"] \ No newline at end of file diff --git a/xobjects/sparse/solvers/CUDA/__init__.py b/xobjects/sparse/solvers/CUDA/__init__.py new file mode 100644 index 0000000..20f7537 --- /dev/null +++ b/xobjects/sparse/solvers/CUDA/__init__.py @@ -0,0 +1,13 @@ +__all__ = [] +try: + from ._cuDSSLU import DirectSolverSuperLU as cuDSSSuperLU + __all__.append("cuDSSSuperLU") +except: + pass +try: + from ._luLU import luLU as CachedSuperLU + __all__.append("CachedSuperLU") + from cupyx.scipy.sparse.linalg import splu as CupySuperLU + __all__.append("CupySuperLU") +except: + pass \ No newline at end of file diff --git a/xobjects/SparseSolvers/CUDA/cuDSSLU.py b/xobjects/sparse/solvers/CUDA/_cuDSSLU.py similarity index 100% rename from xobjects/SparseSolvers/CUDA/cuDSSLU.py rename to xobjects/sparse/solvers/CUDA/_cuDSSLU.py diff --git a/xobjects/SparseSolvers/CUDA/luLU.py b/xobjects/sparse/solvers/CUDA/_luLU.py similarity index 100% rename from xobjects/SparseSolvers/CUDA/luLU.py rename to xobjects/sparse/solvers/CUDA/_luLU.py diff --git a/xobjects/sparse/solvers/__init__.py b/xobjects/sparse/solvers/__init__.py new file mode 100644 index 0000000..c2201f6 --- /dev/null +++ b/xobjects/sparse/solvers/__init__.py @@ -0,0 +1,7 @@ +from . import CPU +__all__ = ["CPU"] +try: + from . import CUDA + __all__.append("CUDA") +except: + pass diff --git a/xobjects/SparseSolvers/abstract_solver.py b/xobjects/sparse/solvers/_abstract_solver.py similarity index 100% rename from xobjects/SparseSolvers/abstract_solver.py rename to xobjects/sparse/solvers/_abstract_solver.py diff --git a/xobjects/sparse/sparse.py b/xobjects/sparse/sparse.py index a7ba025..fe3310b 100644 --- a/xobjects/sparse/sparse.py +++ b/xobjects/sparse/sparse.py @@ -4,7 +4,7 @@ from ..context_cpu import ContextCpu from ..context_cupy import ContextCupy from ..context_pyopencl import ContextPyopencl -from ..SparseSolvers.abstract_solver import SuperLUlikeSolver +from .solvers._abstract_solver import SuperLUlikeSolver try: import cupyx.scipy.sparse from cupyx import cusparse @@ -59,7 +59,7 @@ def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, if force_solver is None: import warnings try: - from ..SparseSolvers.CUDA.cuDSSLU import DirectSolverSuperLU + from .solvers.CUDA._cuDSSLU import DirectSolverSuperLU solver = DirectSolverSuperLU(A, n_batches = n_batches, **solverKwargs) except (ModuleNotFoundError, RuntimeError) as e: warnings.warn("cuDSS not available. " @@ -72,7 +72,7 @@ def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, raise RuntimeError("csrsm2 is avaiable. " "cupy SuperLU performs better " "than Cached-SuperLU (spsm)") - from ..SparseSolvers.CUDA.luLU import luLU + from .solvers.CUDA._luLU import luLU solver = luLU(A, n_batches = n_batches, **solverKwargs) except RuntimeError as e: warnings.warn("Cached-SuperLU (spsm) solver failed. " @@ -80,10 +80,10 @@ def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, f"Error encountered: {e}") solver = cupyx.scipy.sparse.linalg.splu(A, **solverKwargs) elif force_solver == "cuDSS": - from ..SparseSolvers.CUDA.cuDSSLU import DirectSolverSuperLU + from .solvers.CUDA._cuDSSLU import DirectSolverSuperLU solver = DirectSolverSuperLU(A, n_batches = n_batches, **solverKwargs) elif force_solver == "CachedSLU": - from ..SparseSolvers.CUDA.luLU import luLU + from .solvers.CUDA._luLU import luLU solver = luLU(A, n_batches = n_batches, **solverKwargs) elif force_solver == "cupySLU": solver = cupyx.scipy.sparse.linalg.splu(A, **solverKwargs) From 92f825855e1842c1d99c3b826e93795f45ae9e04 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Fri, 21 Nov 2025 11:49:54 +0100 Subject: [PATCH 06/28] Add exception for unavailable cupy in factorized_sparse_solver --- xobjects/sparse/sparse.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/xobjects/sparse/sparse.py b/xobjects/sparse/sparse.py index fe3310b..aef1801 100644 --- a/xobjects/sparse/sparse.py +++ b/xobjects/sparse/sparse.py @@ -8,7 +8,9 @@ try: import cupyx.scipy.sparse from cupyx import cusparse + _cupy_available = True except: + _cupy_available = False pass def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, @@ -48,7 +50,9 @@ def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, "scipySLU, PyKLU") elif context == "cupy" or isinstance(context, ContextCupy): - + if not _cupy_available: + raise ModuleNotFoundError("No cupy module found. " \ + "ContextCupy unavailable") assert isinstance(A ,cupyx.scipy.sparse.csr_matrix), ( "When using ContextCupy, input must be " "cupyx.scipy.sparse.csr_matrix") From 1b5d6c1f29b2409c338749465025b536f74faff3 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Mon, 24 Nov 2025 11:28:21 +0100 Subject: [PATCH 07/28] Make cuDSS solver more robust --- xobjects/sparse/solvers/CUDA/_cuDSSLU.py | 32 ++++++++++++++---------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/xobjects/sparse/solvers/CUDA/_cuDSSLU.py b/xobjects/sparse/solvers/CUDA/_cuDSSLU.py index 54a3504..018ddc2 100644 --- a/xobjects/sparse/solvers/CUDA/_cuDSSLU.py +++ b/xobjects/sparse/solvers/CUDA/_cuDSSLU.py @@ -1,14 +1,15 @@ import cupy as cp import cupyx.scipy.sparse as sp import nvmath +from typing import Literal, Union +import warnings class DirectSolverSuperLU(nvmath.sparse.advanced.DirectSolver): """cuDSS-based direct solver; reuse factors for many RHS (SuperLU-style).""" - def __init__(self, A, *, + def __init__(self, A: sp.csr_matrix, *, n_batches: int = 0, - assume_general=True, - order_rhs='auto', + assume_general: bool = True, **kwargs): # 1) Validate A if not isinstance(A, sp.csr_matrix): @@ -16,13 +17,16 @@ def __init__(self, A, *, if A.shape[0] != A.shape[1]: raise ValueError("A must be square") if A.indices.dtype != cp.int32 or A.indptr.dtype != cp.int32: - A = sp.csr_matrix((A.data, A.indices.astype(cp.int32), A.indptr.astype(cp.int32)), + A = sp.csr_matrix((A.data, + A.indices.astype(cp.int32), + A.indptr.astype(cp.int32)), shape=A.shape) # 2) Sample RHS to initialize parent if n_batches == 0: b_sample = cp.zeros(A.shape[0], dtype=A.dtype) else: - b_sample = cp.zeros((A.shape[0],n_batches), dtype=A.dtype, order = 'F') + b_sample = cp.zeros((A.shape[0],n_batches), + dtype=A.dtype, order = 'F') self.b_dtype = A.dtype self.b_shape = b_sample.shape super().__init__(A, b_sample, **kwargs) @@ -35,8 +39,6 @@ def __init__(self, A, *, pc.matrix_type = nvmath.sparse.advanced.DirectSolverMatrixType.GENERAL except AttributeError: pass # older wheels infer type; safe to ignore - # Expose a simple RHS layout policy - self._order_rhs = order_rhs # 'F'|'C'|'auto' # 4) Build & factorize once; warmup solve to build internal caches super().plan() @@ -47,15 +49,19 @@ def __init__(self, A, *, def _prepare_rhs(self, b): # Accept 1-D or 2-D; ensure dtype/device and layout + if not isinstance(b, cp.ndarray): + raise TypeError('b must be cupy.ndarray') if b.dtype != self.b_dtype: - raise TypeError(f"RHS dtype {b.dtype} does not match matrix dtype {self.b_dtype}") + raise TypeError(f"RHS dtype {b.dtype} does not match " + f"matrix dtype {self.b_dtype}") if b.ndim == 1: return b # vector is fine - if self._order_rhs == 'auto': - # cuDSS prefers column-major for (n,k) - return cp.array(b, order='F', copy=False) - return cp.array(b, order=self._order_rhs, copy=False) - + if not b.flags.f_contiguous: + warnings.warn("cuDSS expects a column-major ('F' ordered) " \ + "matrix for the RHS. RHS was autmatically converted " + "by the solver.") + return cp.asfortranarray(b) + def solve(self, b): """Solve A x = b using cached factors. Accepts (n,) or (n,k) RHS.""" assert b.shape == self.b_shape, ( From 6a853de6f12ed93657f98750a09044dd6d24af16 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Mon, 24 Nov 2025 11:29:05 +0100 Subject: [PATCH 08/28] Allow factorized_sparse_solver function to infer context from input --- xobjects/sparse/sparse.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/xobjects/sparse/sparse.py b/xobjects/sparse/sparse.py index aef1801..c4cf5ea 100644 --- a/xobjects/sparse/sparse.py +++ b/xobjects/sparse/sparse.py @@ -1,4 +1,5 @@ import scipy.sparse +from numpy import ndarray as nparray from typing import Optional, Literal, Union from ..context import XContext from ..context_cpu import ContextCpu @@ -6,6 +7,7 @@ from ..context_pyopencl import ContextPyopencl from .solvers._abstract_solver import SuperLUlikeSolver try: + from cupy import ndarray as cparray import cupyx.scipy.sparse from cupyx import cusparse _cupy_available = True @@ -29,11 +31,23 @@ def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, "cupy", "pyopencl"], XContext - ] = "cpu" + ] = None ) -> SuperLUlikeSolver: if solverKwargs is None: solverKwargs = {} + if context is None: + if isinstance(A, scipy.sparse.spmatrix) or isinstance(A, nparray): + context = 'cpu' + elif (_cupy_available and + (isinstance(A, cupyx.scipy.sparse.spmatrix) + or isinstance(A, cparray))): + context = 'cupy' + else: + raise TypeError("Unsupported type for A") if context == "cpu" or isinstance(context, ContextCpu): + assert isinstance(A, scipy.sparse.spmatrix), ( + "When using CPU context A must be a scipy.sparse matrix" + ) if 'permc_spec' not in solverKwargs: solverKwargs = solverKwargs | {"permc_spec":"MMD_AT_PLUS_A"} if force_solver is None or force_solver == "scipySLU": From aa7bd9e7719c45fece6594f3c8dc63539f50f63a Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Mon, 24 Nov 2025 13:03:03 +0100 Subject: [PATCH 09/28] Clean up luLU solver --- xobjects/sparse/solvers/CUDA/_luLU.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xobjects/sparse/solvers/CUDA/_luLU.py b/xobjects/sparse/solvers/CUDA/_luLU.py index cbe5fb2..4089ace 100644 --- a/xobjects/sparse/solvers/CUDA/_luLU.py +++ b/xobjects/sparse/solvers/CUDA/_luLU.py @@ -15,7 +15,6 @@ scipy_available = True except ImportError: scipy_available = False -from line_profiler import profile class CachedAbSolver: @@ -181,7 +180,7 @@ def _get_opb(self, b): else: raise ValueError('b must be F-contiguous or C-contiguous.') return op_b - @profile + def solve(self, b): assert b.dtype == self.dtype assert self._get_opb(b) == self.op_b @@ -257,7 +256,7 @@ def _init_solvers(self,trans = 'N', n_batches = 0): self.Lsolver = CachedAbSolver(self.L, b_sample, lower=True, transa=self.trans) self.Usolver = CachedAbSolver(self.U, b_sample, lower=False, transa=self.trans) # self.Usolver = CachedAbSolver(self.U.T, b_sample, lower=True, transa="T") #Can improve performance at times - @profile + def solve(self, rhs, trans='N'): """Solves linear system of equations with one or several right-hand sides. From 72e7b8348f1125a98b05528f03220878cbf08d18 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Mon, 24 Nov 2025 15:08:36 +0100 Subject: [PATCH 10/28] Make fix_random_seed function not override returned value --- xobjects/test_helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xobjects/test_helpers.py b/xobjects/test_helpers.py index 00d244d..7db897c 100644 --- a/xobjects/test_helpers.py +++ b/xobjects/test_helpers.py @@ -98,7 +98,8 @@ def wrapper(*args, **kwargs): rng_state = np.random.get_state() try: np.random.seed(seed) - test_function(*args, **kwargs) + #Return value of function instead of None + return test_function(*args, **kwargs) finally: np.random.set_state(rng_state) From c5b3441e9901a688788face6092e4b742bea29f9 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Tue, 25 Nov 2025 14:54:17 +0100 Subject: [PATCH 11/28] Add tests for sparse solvers --- tests/test_sparse_solvers.py | 118 +++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 tests/test_sparse_solvers.py diff --git a/tests/test_sparse_solvers.py b/tests/test_sparse_solvers.py new file mode 100644 index 0000000..40779ce --- /dev/null +++ b/tests/test_sparse_solvers.py @@ -0,0 +1,118 @@ +import numpy as np +import scipy.sparse as sp +import xobjects as xo +from xobjects.test_helpers import fix_random_seed +import pytest + +PARAMS = [ + ("scipySLU", xo.ContextCpu()), + ("PyKLU", xo.ContextCpu()), + ("cuDSS", xo.ContextCupy()), + ("CachedSLU", xo.ContextCupy()), + ("cupySLU", xo.ContextCupy()), +] + +REL_TOL = 1e-4 +ABS_TOL = 1e-8 +SPARSE_SYSTEM_SIZE = 5000 # (n,n) matrix +NUM_BATCHES = 10 + +def batch_vectors_as_matrix(vector_list): + return np.asfortranarray(np.moveaxis(np.array(vector_list),0,-1)) + +@fix_random_seed(1337) +def make_random_sparse_system(n, nbatches, density=0.01): + A = sp.random( + n, n, + density=density, + format="csc", + random_state=np.random, + data_rvs=np.random.standard_normal + ) + # Make it nonsingular & better conditioned: + # Add something on the diagonal so pivots aren't tiny/zero + A = A + sp.eye(n, format="csc") * 5.0 # tweak factor as you like + b_array = [] + if nbatches == 0: + b = np.random.standard_normal(n) + b_array.append(b) + else: + for i in range(nbatches): + b = np.cos(2*i/(nbatches-1)*np.pi) * np.random.standard_normal(n) + b_array.append(b) + b = batch_vectors_as_matrix(b_array) + solver = sp.linalg.splu(A) + x = solver.solve(b) + return (A, b, x, b_array) + +@fix_random_seed(1337) +def make_tridiagonal_system(n, nbatches): + main = 2.0 + np.abs(np.random.standard_normal(n)) + lower = np.random.standard_normal(n-1) + upper = np.random.standard_normal(n-1) + A = sp.diags( + diagonals=[lower, main, upper], + offsets=[-1, 0, 1], + format="csc" + ) + b_array = [] + if nbatches == 0: + b = np.random.standard_normal(n) + b_array.append(b) + else: + for i in range(nbatches): + b = np.cos(2*i/(nbatches-1)*np.pi) * np.random.standard_normal(n) + b_array.append(b) + b = batch_vectors_as_matrix(b_array) + solver = sp.linalg.splu(A) + x = solver.solve(b) + return (A, b, x, b_array) + +random_system = make_random_sparse_system(SPARSE_SYSTEM_SIZE, 0) +tridiag_system = make_tridiagonal_system(SPARSE_SYSTEM_SIZE, 0) + +@pytest.mark.parametrize("test_solver,test_context", PARAMS) +@pytest.mark.parametrize("sparse_system", [random_system, tridiag_system]) +def test_vector_solve(test_solver, test_context, sparse_system): + A_sp, b_sp, x_sp, _ = sparse_system + np.testing.assert_allclose(b_sp, A_sp@x_sp, + rtol = REL_TOL, atol = ABS_TOL) #Verify Scipy result + if "Cpu" in str(test_context): + A = test_context.splike_lib.sparse.csc_matrix(A_sp) + if "Cupy" in str(test_context): + A = test_context.splike_lib.sparse.csr_matrix(A_sp) + b_test = test_context.nparray_to_context_array(b_sp) + b = test_context.nplike_lib.asfortranarray(b_test) + solver = xo.sparse.factorized_sparse_solver(A, + force_solver = test_solver, + context = test_context + ) + x = solver.solve(b) + + xo.assert_allclose(x_sp, x, rtol = REL_TOL, atol = ABS_TOL) + xo.assert_allclose(b, A@x, rtol = REL_TOL, atol = ABS_TOL) + +random_system = make_random_sparse_system(SPARSE_SYSTEM_SIZE, NUM_BATCHES) +tridiag_system = make_tridiagonal_system(SPARSE_SYSTEM_SIZE, NUM_BATCHES) + +@pytest.mark.parametrize("test_solver,test_context", PARAMS) +@pytest.mark.parametrize("sparse_system", [random_system, tridiag_system]) +def test_batched_solve(test_solver, test_context, sparse_system): + A_sp, b_sp, x_sp, _ = sparse_system + np.testing.assert_allclose(b_sp, A_sp@x_sp, + rtol = REL_TOL, atol = ABS_TOL) #Verify Scipy result + if "Cpu" in str(test_context): + A = test_context.splike_lib.sparse.csc_matrix(A_sp) + if "Cupy" in str(test_context): + A = test_context.splike_lib.sparse.csr_matrix(A_sp) + b_test = test_context.nparray_to_context_array(b_sp) + b = test_context.nplike_lib.asfortranarray(b_test) + solver = xo.sparse.factorized_sparse_solver(A, + n_batches = NUM_BATCHES, + force_solver = test_solver, + context = test_context + ) + x = solver.solve(b) + + xo.assert_allclose(x_sp, x, rtol = REL_TOL, atol = ABS_TOL) + xo.assert_allclose(b, A@x, rtol = REL_TOL, atol = ABS_TOL) \ No newline at end of file From 4d455d9dc6e343cd1e896846e6d13a8c9e85f8d9 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Tue, 25 Nov 2025 14:55:32 +0100 Subject: [PATCH 12/28] Stop warning when using force_solver --- xobjects/sparse/sparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xobjects/sparse/sparse.py b/xobjects/sparse/sparse.py index c4cf5ea..e844940 100644 --- a/xobjects/sparse/sparse.py +++ b/xobjects/sparse/sparse.py @@ -51,7 +51,7 @@ def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, if 'permc_spec' not in solverKwargs: solverKwargs = solverKwargs | {"permc_spec":"MMD_AT_PLUS_A"} if force_solver is None or force_solver == "scipySLU": - if A.shape[0]*n_batches < 10**5: + if A.shape[0]*n_batches < 10**5 and force_solver is None: import warnings warnings.warn("For small matrices, using PyKLU " "can provide improved performance") From aae4d2fe871fd351864356bbe17acd2976e550f2 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Tue, 25 Nov 2025 14:56:03 +0100 Subject: [PATCH 13/28] Attempt to fix deallocation exception in cuDSS solver --- xobjects/sparse/solvers/CUDA/_cuDSSLU.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xobjects/sparse/solvers/CUDA/_cuDSSLU.py b/xobjects/sparse/solvers/CUDA/_cuDSSLU.py index 018ddc2..3ca7bfa 100644 --- a/xobjects/sparse/solvers/CUDA/_cuDSSLU.py +++ b/xobjects/sparse/solvers/CUDA/_cuDSSLU.py @@ -95,4 +95,7 @@ def close(self): try: self.__exit__(None, None, None) # DirectSolver is context-manageable except Exception: - pass \ No newline at end of file + pass + + def __dealloc__(self): + self.close() \ No newline at end of file From 30babfc02cab1b8c10b314eee48e4debbfc40b5e Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Thu, 27 Nov 2025 14:44:14 +0100 Subject: [PATCH 14/28] Clean up sparse API --- xobjects/sparse/{sparse.py => _sparse.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename xobjects/sparse/{sparse.py => _sparse.py} (100%) diff --git a/xobjects/sparse/sparse.py b/xobjects/sparse/_sparse.py similarity index 100% rename from xobjects/sparse/sparse.py rename to xobjects/sparse/_sparse.py From 2c64edbb0300681be4d95ad9ff7f4a3dc42fc88a Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Thu, 27 Nov 2025 15:11:00 +0100 Subject: [PATCH 15/28] Refactor sparse package structure --- xobjects/sparse/__init__.py | 2 +- xobjects/sparse/_sparse.py | 2 +- xobjects/sparse/solvers/CPU/__init__.py | 7 ++++++- xobjects/sparse/solvers/CUDA/__init__.py | 9 ++++++--- xobjects/sparse/solvers/__init__.py | 2 +- 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/xobjects/sparse/__init__.py b/xobjects/sparse/__init__.py index b10286c..2fa42ef 100644 --- a/xobjects/sparse/__init__.py +++ b/xobjects/sparse/__init__.py @@ -1,3 +1,3 @@ -from .sparse import factorized_sparse_solver +from ._sparse import factorized_sparse_solver from . import solvers __all__ = ["factorized_sparse_solver","solvers"] diff --git a/xobjects/sparse/_sparse.py b/xobjects/sparse/_sparse.py index e844940..14e65b1 100644 --- a/xobjects/sparse/_sparse.py +++ b/xobjects/sparse/_sparse.py @@ -57,7 +57,7 @@ def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, "can provide improved performance") solver = scipy.sparse.linalg.splu(A.tocsc(),**solverKwargs) elif force_solver == "PyKLU": - import PyKLU.klu as PyKLU + import PyKLU solver = PyKLU.Klu(A.tocsc()) else: raise ValueError("Unrecognized CPU Sparse solver. Available options: " diff --git a/xobjects/sparse/solvers/CPU/__init__.py b/xobjects/sparse/solvers/CPU/__init__.py index 3c30baf..e6b243d 100644 --- a/xobjects/sparse/solvers/CPU/__init__.py +++ b/xobjects/sparse/solvers/CPU/__init__.py @@ -1,2 +1,7 @@ from scipy.sparse.linalg import splu as scipySuperLU -__all__ = ["scipySuperLU"] \ No newline at end of file +__all__ = ["scipySuperLU"] +try: + from PyKLU import Klu as KLUSuperLU + __all__.append("KLUSuperLU") +except ImportError: + pass \ No newline at end of file diff --git a/xobjects/sparse/solvers/CUDA/__init__.py b/xobjects/sparse/solvers/CUDA/__init__.py index 20f7537..d1d8789 100644 --- a/xobjects/sparse/solvers/CUDA/__init__.py +++ b/xobjects/sparse/solvers/CUDA/__init__.py @@ -2,12 +2,15 @@ try: from ._cuDSSLU import DirectSolverSuperLU as cuDSSSuperLU __all__.append("cuDSSSuperLU") -except: +except (ModuleNotFoundError,ImportError): pass try: from ._luLU import luLU as CachedSuperLU __all__.append("CachedSuperLU") from cupyx.scipy.sparse.linalg import splu as CupySuperLU __all__.append("CupySuperLU") -except: - pass \ No newline at end of file +except (ModuleNotFoundError,ImportError): + pass + +if not __all__: + raise ImportError("Failed to import any CUDA-based sparse solver") \ No newline at end of file diff --git a/xobjects/sparse/solvers/__init__.py b/xobjects/sparse/solvers/__init__.py index c2201f6..6804061 100644 --- a/xobjects/sparse/solvers/__init__.py +++ b/xobjects/sparse/solvers/__init__.py @@ -3,5 +3,5 @@ try: from . import CUDA __all__.append("CUDA") -except: +except ImportError: pass From c9a6f4a18346061fc9a992d84718ab60fd7f882d Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Thu, 27 Nov 2025 16:11:34 +0100 Subject: [PATCH 16/28] Make contexts give error when unavailable --- xobjects/context_cupy.py | 5 +++++ xobjects/context_pyopencl.py | 7 ++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/xobjects/context_cupy.py b/xobjects/context_cupy.py index 3838376..651cae5 100644 --- a/xobjects/context_cupy.py +++ b/xobjects/context_cupy.py @@ -398,6 +398,11 @@ def __init__( default_shared_mem_size_bytes=0, device=None, ): + if not _enabled: + raise ModuleNotAvailable( + "cupy is not installed. " "ContextCupy is not available!" + ) + if device is not None: cupy.cuda.Device(device).use() diff --git a/xobjects/context_pyopencl.py b/xobjects/context_pyopencl.py index c61e21e..583394c 100644 --- a/xobjects/context_pyopencl.py +++ b/xobjects/context_pyopencl.py @@ -126,7 +126,12 @@ def __init__( ContextPyopencl: context object. """ - + + if not _enabled: + raise ModuleNotAvailable( + "pyopencl is not installed. ContextPyopencl is not available!" + ) + super().__init__() # TODO assume one device only From 82e5dbec69712efef4e17ce465c828561a6611a7 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Thu, 27 Nov 2025 16:19:33 +0100 Subject: [PATCH 17/28] Add ModuleNotAvailableError for unavailable optional modules --- xobjects/context.py | 4 ++++ xobjects/context_cupy.py | 3 ++- xobjects/context_pyopencl.py | 5 +++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/xobjects/context.py b/xobjects/context.py index effd95a..bbd57de 100644 --- a/xobjects/context.py +++ b/xobjects/context.py @@ -203,6 +203,10 @@ def __init__(self, message="Module not available"): def __getattr__(self, attr): raise NameError(self.message) +class ModuleNotAvailableError(ModuleNotFoundError): + """Raised when an optional dependency is missing.""" + pass + class XContext(ABC): minimum_alignment = 1 diff --git a/xobjects/context_cupy.py b/xobjects/context_cupy.py index 651cae5..d1b3b87 100644 --- a/xobjects/context_cupy.py +++ b/xobjects/context_cupy.py @@ -18,6 +18,7 @@ classes_from_kernels, sort_classes, sources_from_classes, + ModuleNotAvailableError, ) from .linkedarray import BaseLinkedArray from .specialize_source import specialize_source @@ -399,7 +400,7 @@ def __init__( device=None, ): if not _enabled: - raise ModuleNotAvailable( + raise ModuleNotAvailableError( "cupy is not installed. " "ContextCupy is not available!" ) diff --git a/xobjects/context_pyopencl.py b/xobjects/context_pyopencl.py index 583394c..196eb17 100644 --- a/xobjects/context_pyopencl.py +++ b/xobjects/context_pyopencl.py @@ -18,6 +18,7 @@ classes_from_kernels, sort_classes, sources_from_classes, + ModuleNotAvailableError, ) from .linkedarray import BaseLinkedArray from .specialize_source import specialize_source @@ -126,9 +127,9 @@ def __init__( ContextPyopencl: context object. """ - + if not _enabled: - raise ModuleNotAvailable( + raise ModuleNotAvailableError( "pyopencl is not installed. ContextPyopencl is not available!" ) From 5bc9f99a7784d3919267106429c44259f767fa31 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Thu, 27 Nov 2025 16:30:56 +0100 Subject: [PATCH 18/28] Improve tests for sparse solvers. Now using relative residuals --- tests/test_sparse_solvers.py | 84 ++++++++++++++++++++++++-------- xobjects/sparse/_test_helpers.py | 56 +++++++++++++++++++++ 2 files changed, 119 insertions(+), 21 deletions(-) create mode 100644 xobjects/sparse/_test_helpers.py diff --git a/tests/test_sparse_solvers.py b/tests/test_sparse_solvers.py index 40779ce..b462b62 100644 --- a/tests/test_sparse_solvers.py +++ b/tests/test_sparse_solvers.py @@ -2,20 +2,58 @@ import scipy.sparse as sp import xobjects as xo from xobjects.test_helpers import fix_random_seed +from xobjects.sparse import _test_helpers as sptest +from xobjects.context import ModuleNotAvailableError +import warnings import pytest -PARAMS = [ - ("scipySLU", xo.ContextCpu()), - ("PyKLU", xo.ContextCpu()), - ("cuDSS", xo.ContextCupy()), - ("CachedSLU", xo.ContextCupy()), - ("cupySLU", xo.ContextCupy()), -] - -REL_TOL = 1e-4 -ABS_TOL = 1e-8 -SPARSE_SYSTEM_SIZE = 5000 # (n,n) matrix +''' +The following tests rely on computing the relative residual of the solution +The relative residual can be defined as: + || A * x - b || + η = --------------------- + ||A||*||x|| + ||b|| + +Typically, the expected value for this quantity is: +* Ideally: 1e-12 - 1e-14 +* Ill-conditioned systems: 1e-9 - 1e-10 + +In this module, we evaluate the residual as follows: +* We compare the residual of the reference solver (scipy) with ABS_TOL +* We compare the residual of the KLU solver with ABS_TOL +* We ensure that the residual of the KLU Solver is within TOLERANCE_FACTOR + of the reference solver. + +For reference, the machine precision for FP64 is ~2.2e-16 (PRECISION) + +Note: The completely random sparse system is more prone to failing the tests +due to numerical noise, often requires looser tolerances. Still worth including +but if testing larger systems, could potentially be omitted. +''' + +cpu_tests = [ + ("scipySLU", xo.ContextCpu()), + ("PyKLU", xo.ContextCpu()), + ] + +cupy_tests = [] +try: + cupy_tests = [ + ("cuDSS", xo.ContextCupy()), + ("CachedSLU", xo.ContextCupy()), + ("cupySLU", xo.ContextCupy()), + ] +except ModuleNotAvailableError: + warnings.warn("!!!ContextCupy unavailable. " + "Skipping tests for Cupy Solvers!!!") + +PARAMS = cpu_tests + cupy_tests + +SPARSE_SYSTEM_SIZE = 2000 # (n,n) matrix NUM_BATCHES = 10 +PRECISION = np.finfo(float).eps +ABS_TOL = 1e-12 +TOLERANCE_FACTOR = 2 def batch_vectors_as_matrix(vector_list): return np.asfortranarray(np.moveaxis(np.array(vector_list),0,-1)) @@ -75,8 +113,10 @@ def make_tridiagonal_system(n, nbatches): @pytest.mark.parametrize("sparse_system", [random_system, tridiag_system]) def test_vector_solve(test_solver, test_context, sparse_system): A_sp, b_sp, x_sp, _ = sparse_system - np.testing.assert_allclose(b_sp, A_sp@x_sp, - rtol = REL_TOL, atol = ABS_TOL) #Verify Scipy result + assert not sptest.issymmetric(A_sp) + + scipy_residual = sptest.rel_residual(A_sp,x_sp,b_sp) + if "Cpu" in str(test_context): A = test_context.splike_lib.sparse.csc_matrix(A_sp) if "Cupy" in str(test_context): @@ -88,9 +128,10 @@ def test_vector_solve(test_solver, test_context, sparse_system): context = test_context ) x = solver.solve(b) - - xo.assert_allclose(x_sp, x, rtol = REL_TOL, atol = ABS_TOL) - xo.assert_allclose(b, A@x, rtol = REL_TOL, atol = ABS_TOL) + + solver_residual = sptest.rel_residual(A,x,b) + sptest.assert_residual_ok(scipy_residual,solver_residual, + abs_tol = ABS_TOL, factor = TOLERANCE_FACTOR) random_system = make_random_sparse_system(SPARSE_SYSTEM_SIZE, NUM_BATCHES) tridiag_system = make_tridiagonal_system(SPARSE_SYSTEM_SIZE, NUM_BATCHES) @@ -99,8 +140,8 @@ def test_vector_solve(test_solver, test_context, sparse_system): @pytest.mark.parametrize("sparse_system", [random_system, tridiag_system]) def test_batched_solve(test_solver, test_context, sparse_system): A_sp, b_sp, x_sp, _ = sparse_system - np.testing.assert_allclose(b_sp, A_sp@x_sp, - rtol = REL_TOL, atol = ABS_TOL) #Verify Scipy result + assert not sptest.issymmetric(A_sp) + scipy_residual = sptest.rel_residual(A_sp,x_sp,b_sp) if "Cpu" in str(test_context): A = test_context.splike_lib.sparse.csc_matrix(A_sp) if "Cupy" in str(test_context): @@ -113,6 +154,7 @@ def test_batched_solve(test_solver, test_context, sparse_system): context = test_context ) x = solver.solve(b) - - xo.assert_allclose(x_sp, x, rtol = REL_TOL, atol = ABS_TOL) - xo.assert_allclose(b, A@x, rtol = REL_TOL, atol = ABS_TOL) \ No newline at end of file + + solver_residual = sptest.rel_residual(A,x,b) + sptest.assert_residual_ok(scipy_residual,solver_residual, + abs_tol = ABS_TOL, factor = TOLERANCE_FACTOR) \ No newline at end of file diff --git a/xobjects/sparse/_test_helpers.py b/xobjects/sparse/_test_helpers.py new file mode 100644 index 0000000..7b82663 --- /dev/null +++ b/xobjects/sparse/_test_helpers.py @@ -0,0 +1,56 @@ +import numpy.linalg as npl +import scipy.sparse.linalg as scspl +from scipy.sparse import issparse + +def issymmetric(A, tol=0): + if A.shape[0] != A.shape[1]: + return False + diff = A - A.T + if tol == 0: + return diff.nnz == 0 + else: + # tolerance-based check + return abs(diff).max() <= tol + +def rel_residual(A,x,b): + if hasattr(A, "get"): + A = A.get() + if hasattr(x, "get"): + x = x.get() + if hasattr(b, "get"): + b = b.get() + assert issparse(A), ("A must be a sparse matrix") + + return npl.norm(A@x - b) / (scspl.norm(A)*npl.norm(x) + npl.norm(b)) + +def assert_residual_ok(res_ref, res_solver, + abs_tol=1e-12, + factor=10): + """ + Check that our solver's residual is both: + - absolutely small enough (abs_tol), + - not catastrophically worse than the reference (factor * res_ref). + """ + # sanity: reference solver itself should be good + assert res_ref < abs_tol, f"Reference residual too large: {res_ref}" + + # absolute bound + assert res_solver < abs_tol, ( + f"Residual {res_solver} exceeds absolute tolerance {abs_tol}" + ) + + # relative bound vs reference + assert res_solver <= factor * res_ref, ( + f"Residual {res_solver} not within factor {factor} of " + f"reference residual {res_ref}" + ) + +# ---- Unused rn, but could be useful for tests ---- + +def assert_close_to_precision(value, precision): + assert value <= precision, (f"Value {value} not within precision {precision}") + +def assert_residual_close(reference_residual, residual, tolerance = 10): + assert residual <= tolerance*reference_residual, ( + f"Residual {residual} not within tolerance " + f"O({tolerance}) of reference residual {reference_residual}") \ No newline at end of file From 2bd4a14296d6f975094c429989ef005e2d3ff523 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Tue, 2 Dec 2025 10:33:40 +0100 Subject: [PATCH 19/28] Fix relative residual formula in tests --- tests/test_sparse_solvers.py | 2 +- xobjects/sparse/_test_helpers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_sparse_solvers.py b/tests/test_sparse_solvers.py index b462b62..e490335 100644 --- a/tests/test_sparse_solvers.py +++ b/tests/test_sparse_solvers.py @@ -12,7 +12,7 @@ The relative residual can be defined as: || A * x - b || η = --------------------- - ||A||*||x|| + ||b|| + ||b|| Typically, the expected value for this quantity is: * Ideally: 1e-12 - 1e-14 diff --git a/xobjects/sparse/_test_helpers.py b/xobjects/sparse/_test_helpers.py index 7b82663..313aa39 100644 --- a/xobjects/sparse/_test_helpers.py +++ b/xobjects/sparse/_test_helpers.py @@ -21,7 +21,7 @@ def rel_residual(A,x,b): b = b.get() assert issparse(A), ("A must be a sparse matrix") - return npl.norm(A@x - b) / (scspl.norm(A)*npl.norm(x) + npl.norm(b)) + return npl.norm(A@x - b) / (npl.norm(b)) def assert_residual_ok(res_ref, res_solver, abs_tol=1e-12, From 8c121048b6ca6b0fc454378bbdc36d316b9cfdcc Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Tue, 2 Dec 2025 11:38:40 +0100 Subject: [PATCH 20/28] Make sparse solver choice verbose and document --- xobjects/sparse/_sparse.py | 251 +++++++++++++++++++++++++++++++++---- 1 file changed, 225 insertions(+), 26 deletions(-) diff --git a/xobjects/sparse/_sparse.py b/xobjects/sparse/_sparse.py index 14e65b1..dc68340 100644 --- a/xobjects/sparse/_sparse.py +++ b/xobjects/sparse/_sparse.py @@ -6,6 +6,8 @@ from ..context_cupy import ContextCupy from ..context_pyopencl import ContextPyopencl from .solvers._abstract_solver import SuperLUlikeSolver +from ..general import _print + try: from cupy import ndarray as cparray import cupyx.scipy.sparse @@ -31,11 +33,184 @@ def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, "cupy", "pyopencl"], XContext - ] = None + ] = None, + verbose: bool = False ) -> SuperLUlikeSolver: + """ + Build and return a factorized sparse linear solver on CPU or GPU. + + This function inspects the provided sparse matrix and execution context + and returns a *factorized* solver object (SuperLU-like). The solver can + then be reused to efficiently solve multiple linear systems with the same + matrix `A`. + + The actual backend is chosen automatically based on: + * The requested/derived `context` ("cpu", "cupy", "pyopencl"), + * The availability of optional libraries (PyKLU, cuDSS, CuPy/CUSPARSE), + * And the optional `force_solver` argument. + + On CPU: + * Default: try PyKLU, fall back to `scipy.sparse.linalg.splu`. + * You may force `"scipySLU"` or `"PyKLU"` explicitly. + + On CUDA/CuPy: + * Default: try cuDSS (`DirectSolverSuperLU`), then fall back to: + - `cupyx.scipy.sparse.linalg.splu` if CUSPARSE `csrsm2` is available, or + - a cached SuperLU-based solver (`luLU`) and finally `cupyx.splu`. + * You may force `"cuDSS"`, `"CachedSLU"`, or `"cupySLU"` explicitly. + + PyOpenCL: + * Currently not supported and will raise `NotImplementedError`. + + Parameters + ---------- + A : scipy.sparse.spmatrix or cupyx.scipy.sparse.spmatrix + Sparse system matrix to factorize. + + The matrix is internally converted to the format expected by the + chosen backend: + + * CPU context: converted to CSC (`scipy.sparse.csc_matrix`). + * CuPy/GPU context: converted to CSR (`cupyx.scipy.sparse.csr_matrix`). + + For **best performance**, you should pass `A` already in the + preferred format to avoid extra conversions: + + * If the context is (or will usually be) **CPU**, provide `A` + as a CSC matrix (`A.tocsc()`). + * If the context is **GPU/CuPy**, provide `A` as a CSR matrix + (`A.tocsr()` or `cupyx.scipy.sparse.csr_matrix`). + + If `context` is `None`, it is still inferred from the type of `A` + and the availability of CuPy, e.g.: + + * SciPy sparse or NumPy array → `"cpu"`, + * CuPy sparse or CuPy array → `"cupy"`, + * otherwise a `TypeError` is raised. + + n_batches : int, optional + Controls the expected shape of the right-hand side (RHS) for GPU + solvers and hence whether solves are treated as single or batched: + + * If ``n_batches == 0`` (default), the solver is configured for + single-RHS solves and expects a vector RHS of shape ``(n,)``. + * If ``n_batches > 0``, the solver is configured for batched solves + and expects a 2D RHS array of shape ``(n, n_batches)`` (i.e. + ``nrhs = n_batches``). + + This argument is primarily used by CUDA-based solvers (e.g. cuDSS and + cached SuperLU) to preconfigure internal data structures for batched + solves. It has no effect for CPU-based solvers. + + force_solver : {"scipySLU", "PyKLU", "cuDSS", "CachedSLU", "cupySLU"}, optional + If provided, forces the use of a specific backend instead of the + automatic selection: + + * `"scipySLU"` : Use `scipy.sparse.linalg.splu` (CPU). + * `"PyKLU"` : Use the `PyKLU.Klu` solver (CPU). + * `"cuDSS"` : Use CUDA/cuDSS-based `DirectSolverSuperLU` (GPU). + * `"CachedSLU"`: Use CUDA cached SuperLU (`luLU`) (GPU). + * `"cupySLU"` : Use `cupyx.scipy.sparse.linalg.splu` (GPU). + + Using a solver that does not match the current `context` will result + in a `ValueError`. + + solverKwargs : dict, optional + Extra keyword arguments forwarded to the underlying solver constructor. + If `None`, an empty dict is used. + + Some backends make use of `permc_spec` (matrix permutation strategy). + When not explicitly provided and appropriate, this function sets + `permc_spec="MMD_AT_PLUS_A"` as a sensible default for the matrices that + will typically be encountered in an xobjects workflow. + + context : {"cpu", "cupy", "pyopencl"} or XContext, optional + Execution context. Can be either: + + * A string: + - `"cpu"`: Use CPU-based solvers (SciPy / PyKLU). + - `"cupy"`: Use CuPy/CUDA-based solvers. + - `"pyopencl"`: PyOpenCL context (currently unsupported). + * A context object instance: + - `ContextCpu` + - `ContextCupy` + - `ContextPyopencl` + + If `None`, the context is inferred from `A` as described above. + + verbose : bool, optional + If `True`, prints debug messages describing the solver-selection + process, fallbacks, and the final solver that is returned. + + Returns + ------- + SuperLUlikeSolver + A factorized solver object compatible with SciPy’s `splu`-like + interface (i.e. typically exposing a `solve` method and related + accessors). The exact concrete type depends on the backend: + * CPU: + - `scipy.sparse.linalg.SuperLU` (for `"scipySLU"`), + - `PyKLU.Klu` (for `"PyKLU"`). + * CUDA/CuPy: + - `DirectSolverSuperLU` (cuDSS), + - `luLU` (cached SuperLU), + - `cupyx.scipy.sparse.linalg.SuperLU` (for `"cupySLU"`). + + Raises + ------ + TypeError + If the type of `A` is unsupported when inferring the context. + + AssertionError + If `A` does not match the required type for the chosen context + + ModuleNotFoundError + If a requested solver backend depends on a module that is not + installed (e.g. CuPy, PyKLU, cuDSS), and no fallback is available. + + RuntimeError + If a requested GPU solver fails during initialization. + + NotImplementedError + If `context` is `"pyopencl"` or `ContextPyopencl`, since no sparse + solver is currently implemented for that backend. + + ValueError + If an invalid `context` string is provided, or if `force_solver` + does not match any known solver for the active context. + + Notes + ----- + - For best performance on CPU, PyKLU is preferred when available. + - For CuPy/CUDA, cuDSS is preferred when available, and this function + will automatically fall back to other solvers if cuDSS is not present + or fails at runtime. + - The returned solver is *factorized* and should be reused to solve + multiple right-hand sides efficiently. + + Examples + -------- + Factorize a SciPy sparse matrix on CPU with automatic solver selection: + + >>> A = scipy.sparse.random(1000, 1000, density=0.01, format="csr") + >>> solver = factorized_sparse_solver(A) + >>> x = solver.solve(b) + + Explicitly request the SciPy SuperLU solver on CPU: + + >>> solver = factorized_sparse_solver(A, force_solver="scipySLU") + + Using CuPy and cuDSS (requires CuPy and cuDSS bindings): + + >>> A_gpu = cupyx.scipy.sparse.csr_matrix(A) + >>> solver = factorized_sparse_solver(A_gpu, context="cupy") + >>> x_gpu = solver.solve(b_gpu) + """ if solverKwargs is None: solverKwargs = {} if context is None: + dbugprint(verbose, "No context provided. " \ + "Context will be inferred from matrix") if isinstance(A, scipy.sparse.spmatrix) or isinstance(A, nparray): context = 'cpu' elif (_cupy_available and @@ -48,17 +223,28 @@ def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, assert isinstance(A, scipy.sparse.spmatrix), ( "When using CPU context A must be a scipy.sparse matrix" ) + A = A.tocsc() # CPU Solvers require csc format if 'permc_spec' not in solverKwargs: solverKwargs = solverKwargs | {"permc_spec":"MMD_AT_PLUS_A"} - if force_solver is None or force_solver == "scipySLU": - if A.shape[0]*n_batches < 10**5 and force_solver is None: - import warnings - warnings.warn("For small matrices, using PyKLU " - "can provide improved performance") - solver = scipy.sparse.linalg.splu(A.tocsc(),**solverKwargs) + if force_solver is None: + dbugprint(verbose, "No solver requested. " \ + "Picking best solver for CPU Context") + try: + dbugprint(verbose, "Attempting to use PyKLU") + import PyKLU + solver = PyKLU.Klu(A) + dbugprint(verbose, "PyKLU succeeded") + except (ModuleNotFoundError, RuntimeError) as e: + dbugprint(verbose, "PyKLU failed. " \ + "Falling back to scipy.splu \n" + f"Encountered error: {e}") + + solver = scipy.sparse.linalg.splu(A,**solverKwargs) + elif force_solver == "scipySLU": + solver = scipy.sparse.linalg.splu(A,**solverKwargs) elif force_solver == "PyKLU": import PyKLU - solver = PyKLU.Klu(A.tocsc()) + solver = PyKLU.Klu(A) else: raise ValueError("Unrecognized CPU Sparse solver. Available options: " "scipySLU, PyKLU") @@ -67,36 +253,44 @@ def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, if not _cupy_available: raise ModuleNotFoundError("No cupy module found. " \ "ContextCupy unavailable") - assert isinstance(A ,cupyx.scipy.sparse.csr_matrix), ( + assert isinstance(A ,cupyx.scipy.sparse.spmatrix), ( "When using ContextCupy, input must be " - "cupyx.scipy.sparse.csr_matrix") - + "cupyx.scipy.sparse matrix") + + A = A.tocsr() # GPU solvers require csr format if force_solver is not None and force_solver != "cuDSS": if 'permc_spec' not in solverKwargs: solverKwargs = solverKwargs | {"permc_spec":"MMD_AT_PLUS_A"} if force_solver is None: + dbugprint(verbose, "No solver requested. " \ + "Picking best solver for Cupy Context") import warnings try: + dbugprint(verbose, "Attempting to use cuDSS Solver") from .solvers.CUDA._cuDSSLU import DirectSolverSuperLU solver = DirectSolverSuperLU(A, n_batches = n_batches, **solverKwargs) + dbugprint(verbose, "cuDSS succeeded") except (ModuleNotFoundError, RuntimeError) as e: - warnings.warn("cuDSS not available. " - "Falling back to Cached-SuperLU (spsm) " - f"Encountered Error: {e}") + dbugprint(verbose, "cuDSS failed. \n" + f"Encountered Error: {e}") + warnings.warn("cuDSS not available. Performance will be degraded") if 'permc_spec' not in solverKwargs: solverKwargs = solverKwargs | {"permc_spec":"MMD_AT_PLUS_A"} - try: - if cusparse.check_availability('csrsm2'): - raise RuntimeError("csrsm2 is avaiable. " - "cupy SuperLU performs better " - "than Cached-SuperLU (spsm)") - from .solvers.CUDA._luLU import luLU - solver = luLU(A, n_batches = n_batches, **solverKwargs) - except RuntimeError as e: - warnings.warn("Cached-SuperLU (spsm) solver failed. " - "Falling back to cupy SuperLU. " - f"Error encountered: {e}") + if cusparse.check_availability('csrsm2'): + dbugprint(verbose, "csrsm2 available. Using cupyx.splu solver") solver = cupyx.scipy.sparse.linalg.splu(A, **solverKwargs) + else: + try: + dbugprint(verbose, "csrms2 unavailable. " \ + "Attempting to use CachedSuperLU (spsm)") + from .solvers.CUDA._luLU import luLU + solver = luLU(A, n_batches = n_batches, **solverKwargs) + dbugprint(verbose, "CachedSuperLU succeeded") + except RuntimeError as e: + dbugprint(verbose, "CachedSuperLU failed. \n" + f"Encountered error: {e} \n" + "Falling back to cupyx.splu with spsm") + solver = cupyx.scipy.sparse.linalg.splu(A, **solverKwargs) elif force_solver == "cuDSS": from .solvers.CUDA._cuDSSLU import DirectSolverSuperLU solver = DirectSolverSuperLU(A, n_batches = n_batches, **solverKwargs) @@ -114,4 +308,9 @@ def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, else: raise ValueError("Invalid context. Available contexts are: " \ "cpu, cupy, pyopencl") - return solver \ No newline at end of file + dbugprint(verbose, "Returning solver: " + str(solver)) + return solver + +def dbugprint(verbose: bool, text: str): + if verbose: + _print("[xo.sparse] "+text) \ No newline at end of file From 79bc3499d44f173d87394a36aec55f335b187bf8 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Tue, 2 Dec 2025 11:58:42 +0100 Subject: [PATCH 21/28] Update solver interface so that it is consistent accross platforms --- xobjects/sparse/solvers/CUDA/__init__.py | 26 ++++++++++++++++-------- xobjects/sparse/solvers/__init__.py | 8 ++------ 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/xobjects/sparse/solvers/CUDA/__init__.py b/xobjects/sparse/solvers/CUDA/__init__.py index d1d8789..c8d6a4d 100644 --- a/xobjects/sparse/solvers/CUDA/__init__.py +++ b/xobjects/sparse/solvers/CUDA/__init__.py @@ -1,16 +1,24 @@ -__all__ = [] +from ....context import ModuleNotAvailableError try: from ._cuDSSLU import DirectSolverSuperLU as cuDSSSuperLU - __all__.append("cuDSSSuperLU") -except (ModuleNotFoundError,ImportError): - pass +except (ModuleNotFoundError,ImportError) as e: + def cuDSSSuperLU(*args, **kwargs): + raise ModuleNotAvailableError( + "cuDSSSuperLU is not available. Could not import required backend." + ) from e try: from ._luLU import luLU as CachedSuperLU - __all__.append("CachedSuperLU") +except (ModuleNotFoundError,ImportError): + def CachedSuperLU(*args, **kwargs): + raise ModuleNotAvailableError( + "CachedSuperLU is not available. Could not import required backend." + ) from e +try: from cupyx.scipy.sparse.linalg import splu as CupySuperLU - __all__.append("CupySuperLU") except (ModuleNotFoundError,ImportError): - pass + def CupySuperLU(*args, **kwargs): + raise ModuleNotAvailableError( + "CupySuperLU is not available. Could not import required backend." + ) from e -if not __all__: - raise ImportError("Failed to import any CUDA-based sparse solver") \ No newline at end of file +__all__ = ["cuDSSSuperLU", "CachedSuperLU", "CupySuperLU"] \ No newline at end of file diff --git a/xobjects/sparse/solvers/__init__.py b/xobjects/sparse/solvers/__init__.py index 6804061..3721b50 100644 --- a/xobjects/sparse/solvers/__init__.py +++ b/xobjects/sparse/solvers/__init__.py @@ -1,7 +1,3 @@ from . import CPU -__all__ = ["CPU"] -try: - from . import CUDA - __all__.append("CUDA") -except ImportError: - pass +from . import CUDA +__all__ = ["CPU", "CUDA"] From 9b2d052c9dc4b2d9b16e1237b78f98d2a46e88d8 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Tue, 2 Dec 2025 12:12:56 +0100 Subject: [PATCH 22/28] Update import logic and error raising --- xobjects/sparse/solvers/CPU/__init__.py | 13 +++++++++---- xobjects/sparse/solvers/CUDA/__init__.py | 16 ++++++++-------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/xobjects/sparse/solvers/CPU/__init__.py b/xobjects/sparse/solvers/CPU/__init__.py index e6b243d..a5f6275 100644 --- a/xobjects/sparse/solvers/CPU/__init__.py +++ b/xobjects/sparse/solvers/CPU/__init__.py @@ -1,7 +1,12 @@ from scipy.sparse.linalg import splu as scipySuperLU -__all__ = ["scipySuperLU"] +from ....context import ModuleNotAvailableError + try: from PyKLU import Klu as KLUSuperLU - __all__.append("KLUSuperLU") -except ImportError: - pass \ No newline at end of file +except (ModuleNotFoundError,ImportError) as e: + def KLUSuperLU(*args, _import_err=e, **kwargs): + raise ModuleNotAvailableError( + "KLUSuperLU is not available. Could not import required backend." + ) from _import_err + +__all__ = ["scipySuperLU", "KLUSuperLU"] \ No newline at end of file diff --git a/xobjects/sparse/solvers/CUDA/__init__.py b/xobjects/sparse/solvers/CUDA/__init__.py index c8d6a4d..f969a27 100644 --- a/xobjects/sparse/solvers/CUDA/__init__.py +++ b/xobjects/sparse/solvers/CUDA/__init__.py @@ -2,23 +2,23 @@ try: from ._cuDSSLU import DirectSolverSuperLU as cuDSSSuperLU except (ModuleNotFoundError,ImportError) as e: - def cuDSSSuperLU(*args, **kwargs): + def cuDSSSuperLU(*args, _import_err=e, **kwargs): raise ModuleNotAvailableError( "cuDSSSuperLU is not available. Could not import required backend." - ) from e + ) from _import_err try: from ._luLU import luLU as CachedSuperLU -except (ModuleNotFoundError,ImportError): - def CachedSuperLU(*args, **kwargs): +except (ModuleNotFoundError,ImportError) as e: + def CachedSuperLU(*args, _import_err=e, **kwargs): raise ModuleNotAvailableError( "CachedSuperLU is not available. Could not import required backend." - ) from e + ) from _import_err try: from cupyx.scipy.sparse.linalg import splu as CupySuperLU -except (ModuleNotFoundError,ImportError): - def CupySuperLU(*args, **kwargs): +except (ModuleNotFoundError,ImportError) as e: + def CupySuperLU(*args, _import_err=e, **kwargs): raise ModuleNotAvailableError( "CupySuperLU is not available. Could not import required backend." - ) from e + ) from _import_err __all__ = ["cuDSSSuperLU", "CachedSuperLU", "CupySuperLU"] \ No newline at end of file From c225bd846175af63fa9927e0a9446e976bdca82e Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Tue, 2 Dec 2025 12:50:52 +0100 Subject: [PATCH 23/28] Update documentation and naming --- xobjects/sparse/_sparse.py | 16 ++++++++++++++-- xobjects/sparse/solvers/CPU/__init__.py | 13 ++++++------- xobjects/sparse/solvers/CUDA/__init__.py | 24 +++++++++++++----------- 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/xobjects/sparse/_sparse.py b/xobjects/sparse/_sparse.py index dc68340..0d7301c 100644 --- a/xobjects/sparse/_sparse.py +++ b/xobjects/sparse/_sparse.py @@ -109,7 +109,7 @@ def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, * `"scipySLU"` : Use `scipy.sparse.linalg.splu` (CPU). * `"PyKLU"` : Use the `PyKLU.Klu` solver (CPU). * `"cuDSS"` : Use CUDA/cuDSS-based `DirectSolverSuperLU` (GPU). - * `"CachedSLU"`: Use CUDA cached SuperLU (`luLU`) (GPU). + * `"CachedSLU"`: Use CUDA cached spsm SuperLU (`luLU`) (GPU). * `"cupySLU"` : Use `cupyx.scipy.sparse.linalg.splu` (GPU). Using a solver that does not match the current `context` will result @@ -153,8 +153,20 @@ def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, - `PyKLU.Klu` (for `"PyKLU"`). * CUDA/CuPy: - `DirectSolverSuperLU` (cuDSS), - - `luLU` (cached SuperLU), + - `luLU` (cached spsm SuperLU), - `cupyx.scipy.sparse.linalg.SuperLU` (for `"cupySLU"`). + + All returned solver objects implement a `solve(b)` method. + For **optimal performance across all backends**, the right-hand + side `b` should be passed as a **Fortran-contiguous (column-major)** + array: + + * For a single RHS: shape ``(n,)`` or ``(n, 1)`` (Fortran contiguous). + * For multiple RHSs: shape ``(n, nrhs)`` with **Fortran layout**. + + If `b` is not Fortran-contiguous, the solver will internally copy or + transform it, which can incur extra overhead—especially on CUDA/GPU + backends and in batched-solve scenarios. Raises ------ diff --git a/xobjects/sparse/solvers/CPU/__init__.py b/xobjects/sparse/solvers/CPU/__init__.py index a5f6275..07b2c79 100644 --- a/xobjects/sparse/solvers/CPU/__init__.py +++ b/xobjects/sparse/solvers/CPU/__init__.py @@ -1,12 +1,11 @@ -from scipy.sparse.linalg import splu as scipySuperLU -from ....context import ModuleNotAvailableError - +from scipy.sparse.linalg import splu as scipysplu try: - from PyKLU import Klu as KLUSuperLU + from PyKLU import Klu as KLU except (ModuleNotFoundError,ImportError) as e: - def KLUSuperLU(*args, _import_err=e, **kwargs): + def KLU(*args, _import_err=e, **kwargs): + from ....context import ModuleNotAvailableError raise ModuleNotAvailableError( - "KLUSuperLU is not available. Could not import required backend." + "KLU is not available. Could not import required backend." ) from _import_err -__all__ = ["scipySuperLU", "KLUSuperLU"] \ No newline at end of file +__all__ = ["scipysplu", "KLU"] \ No newline at end of file diff --git a/xobjects/sparse/solvers/CUDA/__init__.py b/xobjects/sparse/solvers/CUDA/__init__.py index f969a27..536fe49 100644 --- a/xobjects/sparse/solvers/CUDA/__init__.py +++ b/xobjects/sparse/solvers/CUDA/__init__.py @@ -1,24 +1,26 @@ -from ....context import ModuleNotAvailableError try: - from ._cuDSSLU import DirectSolverSuperLU as cuDSSSuperLU + from ._cuDSSLU import DirectSolverSuperLU as cuDSS except (ModuleNotFoundError,ImportError) as e: - def cuDSSSuperLU(*args, _import_err=e, **kwargs): + def cuDSS(*args, _import_err=e, **kwargs): + from ....context import ModuleNotAvailableError raise ModuleNotAvailableError( - "cuDSSSuperLU is not available. Could not import required backend." + "cuDSS is not available. Could not import required backend." ) from _import_err try: - from ._luLU import luLU as CachedSuperLU + from ._luLU import luLU as cachedSpSM except (ModuleNotFoundError,ImportError) as e: - def CachedSuperLU(*args, _import_err=e, **kwargs): + def cachedSpSM(*args, _import_err=e, **kwargs): + from ....context import ModuleNotAvailableError raise ModuleNotAvailableError( - "CachedSuperLU is not available. Could not import required backend." + "cachedSpSM is not available. Could not import required backend." ) from _import_err try: - from cupyx.scipy.sparse.linalg import splu as CupySuperLU + from cupyx.scipy.sparse.linalg import splu as cupysplu except (ModuleNotFoundError,ImportError) as e: - def CupySuperLU(*args, _import_err=e, **kwargs): + def cupysplu(*args, _import_err=e, **kwargs): + from ....context import ModuleNotAvailableError raise ModuleNotAvailableError( - "CupySuperLU is not available. Could not import required backend." + "cupysplu is not available. Could not import required backend." ) from _import_err -__all__ = ["cuDSSSuperLU", "CachedSuperLU", "CupySuperLU"] \ No newline at end of file +__all__ = ["cuDSS", "cachedSpSM", "cupysplu"] \ No newline at end of file From 031098195c67f9c60e6f9c0fe96010666641437f Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Tue, 2 Dec 2025 13:06:51 +0100 Subject: [PATCH 24/28] Add example for sparse solvers --- examples/solve_sparse_system.py | 74 +++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 examples/solve_sparse_system.py diff --git a/examples/solve_sparse_system.py b/examples/solve_sparse_system.py new file mode 100644 index 0000000..4e02673 --- /dev/null +++ b/examples/solve_sparse_system.py @@ -0,0 +1,74 @@ +import xobjects as xo +import scipy.sparse as sp +import numpy as np +from xobjects.sparse._test_helpers import rel_residual + +''' +The goal of this example is to provide a short user guide for the xo.sparse module. + +The xo.sparse module can be used to solve sparse linear systems of equations: + A*x = b +where A is a sparse matrix. + +Currently this module only supports CPU and Cupy contexts. This module contains a +variety of solvers for different contexts, with consistent APIs. The intended use +is to reuse the same LHS for many solves, so the solvers work as follows: + +solver(A) # Performs decomposition/factorization +solver.solve(b) # Solves Ax = b using precomputed factors + +For optimal performance accross backends b should be a column-major (F Contiguous) +array or vector. + +The intended interface for this module is: + +xo.sparse.factorized_sparse_solver() + +The function includes detailed documentation for usage, but in short, it returns +the best performing solver based on the context and available modules. If the +context is not explicitly defined, it is inferred based on the input matrix. + +This is how modules that build upon this functionality within xsuite should interact +with the xo.sparse module, so that cross-platform compatibility is guaranteed. + +For development and convenience purposes xo.sparse provides the: +xo.sparse.solvers module + +which provides the following options: +xo.sparse.solvers.CPU. + - scipysplu : Alias for scipy SuperLU + - KLUSuperLU : Alias for PyKLU +xo.sparse.solvers.CPU. + - cuDSS : nvmath.sparse.advanced.DirectSolver Wrapper with a SuperLU-like interface + - cachedSpSM : Rewrite of cupy's SuperLU to cache the SpSM analysis step + offering massive speedups compared to cupy splu when the only + available backend is SpSM + - cupysplu : Alias for scipy SuperLU +''' + +# Example: solve small matrix system +n = 5 +# Create matrix +main = 2.0 + np.abs(np.random.standard_normal(n)) +lower = np.random.standard_normal(n-1) +upper = np.random.standard_normal(n-1) +A = sp.diags( + diagonals=[lower, main, upper], + offsets=[-1, 0, 1], + format="csc" +) + +# Create solver: +print("Solver selection process: ") +solver = xo.sparse.factorized_sparse_solver(A, verbose = True) + +# Generate random vector to solve: +b = np.random.standard_normal(n) + +# Solve system +x = solver.solve(b) + +# Calculate relative residual to assess solver: +res = rel_residual(A,x,b) +print("Relative residual of solution ", res) +print("Residual should be small (<10^-12)") From af1f74921b8d49d6ae483677f04fb94b898bc7df Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Tue, 2 Dec 2025 13:38:38 +0100 Subject: [PATCH 25/28] Fix file formatting --- examples/solve_sparse_system.py | 9 +++++++-- tests/test_sparse_solvers.py | 5 +++++ xobjects/sparse/_sparse.py | 5 +++++ xobjects/sparse/_test_helpers.py | 5 +++++ xobjects/sparse/solvers/CUDA/_cuDSSLU.py | 5 +++++ xobjects/sparse/solvers/CUDA/_luLU.py | 5 +++++ xobjects/sparse/solvers/_abstract_solver.py | 5 +++++ 7 files changed, 37 insertions(+), 2 deletions(-) diff --git a/examples/solve_sparse_system.py b/examples/solve_sparse_system.py index 4e02673..c9a17cb 100644 --- a/examples/solve_sparse_system.py +++ b/examples/solve_sparse_system.py @@ -1,3 +1,8 @@ +# copyright ################################# # +# This file is part of the Xobjects Package. # +# Copyright (c) CERN, 2021. # +# ########################################### # + import xobjects as xo import scipy.sparse as sp import numpy as np @@ -17,7 +22,7 @@ solver(A) # Performs decomposition/factorization solver.solve(b) # Solves Ax = b using precomputed factors -For optimal performance accross backends b should be a column-major (F Contiguous) +For optimal performance across backends b should be a column-major (F Contiguous) array or vector. The intended interface for this module is: @@ -34,7 +39,7 @@ For development and convenience purposes xo.sparse provides the: xo.sparse.solvers module -which provides the following options: +which provides the following: xo.sparse.solvers.CPU. - scipysplu : Alias for scipy SuperLU - KLUSuperLU : Alias for PyKLU diff --git a/tests/test_sparse_solvers.py b/tests/test_sparse_solvers.py index e490335..8fd77e8 100644 --- a/tests/test_sparse_solvers.py +++ b/tests/test_sparse_solvers.py @@ -1,3 +1,8 @@ +# copyright ################################# # +# This file is part of the Xobjects Package. # +# Copyright (c) CERN, 2021. # +# ########################################### # + import numpy as np import scipy.sparse as sp import xobjects as xo diff --git a/xobjects/sparse/_sparse.py b/xobjects/sparse/_sparse.py index 0d7301c..9091970 100644 --- a/xobjects/sparse/_sparse.py +++ b/xobjects/sparse/_sparse.py @@ -1,3 +1,8 @@ +# copyright ################################# # +# This file is part of the Xobjects Package. # +# Copyright (c) CERN, 2021. # +# ########################################### # + import scipy.sparse from numpy import ndarray as nparray from typing import Optional, Literal, Union diff --git a/xobjects/sparse/_test_helpers.py b/xobjects/sparse/_test_helpers.py index 313aa39..6b06875 100644 --- a/xobjects/sparse/_test_helpers.py +++ b/xobjects/sparse/_test_helpers.py @@ -1,3 +1,8 @@ +# copyright ################################# # +# This file is part of the Xobjects Package. # +# Copyright (c) CERN, 2021. # +# ########################################### # + import numpy.linalg as npl import scipy.sparse.linalg as scspl from scipy.sparse import issparse diff --git a/xobjects/sparse/solvers/CUDA/_cuDSSLU.py b/xobjects/sparse/solvers/CUDA/_cuDSSLU.py index 3ca7bfa..31ded5e 100644 --- a/xobjects/sparse/solvers/CUDA/_cuDSSLU.py +++ b/xobjects/sparse/solvers/CUDA/_cuDSSLU.py @@ -1,3 +1,8 @@ +# copyright ################################# # +# This file is part of the Xobjects Package. # +# Copyright (c) CERN, 2021. # +# ########################################### # + import cupy as cp import cupyx.scipy.sparse as sp import nvmath diff --git a/xobjects/sparse/solvers/CUDA/_luLU.py b/xobjects/sparse/solvers/CUDA/_luLU.py index 4089ace..25665ec 100644 --- a/xobjects/sparse/solvers/CUDA/_luLU.py +++ b/xobjects/sparse/solvers/CUDA/_luLU.py @@ -1,3 +1,8 @@ +# copyright ################################# # +# This file is part of the Xobjects Package. # +# Copyright (c) CERN, 2021. # +# ########################################### # + import cupy as _cupy import numpy as _numpy import cupyx.scipy.sparse diff --git a/xobjects/sparse/solvers/_abstract_solver.py b/xobjects/sparse/solvers/_abstract_solver.py index 8a6022c..9acfe10 100644 --- a/xobjects/sparse/solvers/_abstract_solver.py +++ b/xobjects/sparse/solvers/_abstract_solver.py @@ -1,3 +1,8 @@ +# copyright ################################# # +# This file is part of the Xobjects Package. # +# Copyright (c) CERN, 2021. # +# ########################################### # + from abc import ABC, abstractmethod class SuperLUlikeSolver(ABC): From 9f392e7887ea797b396d10f750007367b9e61887 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Tue, 2 Dec 2025 14:17:24 +0100 Subject: [PATCH 26/28] Refactor test functions --- tests/test_sparse_solvers.py | 56 +++++++++++++++++++++++------ xobjects/sparse/__init__.py | 4 +-- xobjects/sparse/_sparse.py | 14 +++++++- xobjects/sparse/_test_helpers.py | 61 -------------------------------- 4 files changed, 60 insertions(+), 75 deletions(-) delete mode 100644 xobjects/sparse/_test_helpers.py diff --git a/tests/test_sparse_solvers.py b/tests/test_sparse_solvers.py index 8fd77e8..eb43633 100644 --- a/tests/test_sparse_solvers.py +++ b/tests/test_sparse_solvers.py @@ -7,7 +7,6 @@ import scipy.sparse as sp import xobjects as xo from xobjects.test_helpers import fix_random_seed -from xobjects.sparse import _test_helpers as sptest from xobjects.context import ModuleNotAvailableError import warnings import pytest @@ -36,6 +35,41 @@ but if testing larger systems, could potentially be omitted. ''' +# ---- Helper functions ---- +def issymmetric(A, tol=0): + if A.shape[0] != A.shape[1]: + return False + diff = A - A.T + if tol == 0: + return diff.nnz == 0 + else: + # tolerance-based check + return abs(diff).max() <= tol + + +def assert_residual_ok(res_ref, res_solver, + abs_tol=1e-12, + factor=10): + """ + Check that our solver's residual is both: + - absolutely small enough (abs_tol), + - not catastrophically worse than the reference (factor * res_ref). + """ + # sanity: reference solver itself should be good + assert res_ref < abs_tol, f"Reference residual too large: {res_ref}" + + # absolute bound + assert res_solver < abs_tol, ( + f"Residual {res_solver} exceeds absolute tolerance {abs_tol}" + ) + + # relative bound vs reference + assert res_solver <= factor * res_ref, ( + f"Residual {res_solver} not within factor {factor} of " + f"reference residual {res_ref}" + ) + +# ---- Tests ---- cpu_tests = [ ("scipySLU", xo.ContextCpu()), ("PyKLU", xo.ContextCpu()), @@ -118,9 +152,9 @@ def make_tridiagonal_system(n, nbatches): @pytest.mark.parametrize("sparse_system", [random_system, tridiag_system]) def test_vector_solve(test_solver, test_context, sparse_system): A_sp, b_sp, x_sp, _ = sparse_system - assert not sptest.issymmetric(A_sp) + assert not issymmetric(A_sp) - scipy_residual = sptest.rel_residual(A_sp,x_sp,b_sp) + scipy_residual = xo.sparse.rel_residual(A_sp,x_sp,b_sp) if "Cpu" in str(test_context): A = test_context.splike_lib.sparse.csc_matrix(A_sp) @@ -134,9 +168,9 @@ def test_vector_solve(test_solver, test_context, sparse_system): ) x = solver.solve(b) - solver_residual = sptest.rel_residual(A,x,b) - sptest.assert_residual_ok(scipy_residual,solver_residual, - abs_tol = ABS_TOL, factor = TOLERANCE_FACTOR) + solver_residual = xo.sparse.rel_residual(A,x,b) + assert_residual_ok(scipy_residual,solver_residual, + abs_tol = ABS_TOL, factor = TOLERANCE_FACTOR) random_system = make_random_sparse_system(SPARSE_SYSTEM_SIZE, NUM_BATCHES) tridiag_system = make_tridiagonal_system(SPARSE_SYSTEM_SIZE, NUM_BATCHES) @@ -145,8 +179,8 @@ def test_vector_solve(test_solver, test_context, sparse_system): @pytest.mark.parametrize("sparse_system", [random_system, tridiag_system]) def test_batched_solve(test_solver, test_context, sparse_system): A_sp, b_sp, x_sp, _ = sparse_system - assert not sptest.issymmetric(A_sp) - scipy_residual = sptest.rel_residual(A_sp,x_sp,b_sp) + assert not issymmetric(A_sp) + scipy_residual = xo.sparse.rel_residual(A_sp,x_sp,b_sp) if "Cpu" in str(test_context): A = test_context.splike_lib.sparse.csc_matrix(A_sp) if "Cupy" in str(test_context): @@ -160,6 +194,6 @@ def test_batched_solve(test_solver, test_context, sparse_system): ) x = solver.solve(b) - solver_residual = sptest.rel_residual(A,x,b) - sptest.assert_residual_ok(scipy_residual,solver_residual, - abs_tol = ABS_TOL, factor = TOLERANCE_FACTOR) \ No newline at end of file + solver_residual = xo.sparse.rel_residual(A,x,b) + assert_residual_ok(scipy_residual,solver_residual, + abs_tol = ABS_TOL, factor = TOLERANCE_FACTOR) \ No newline at end of file diff --git a/xobjects/sparse/__init__.py b/xobjects/sparse/__init__.py index 2fa42ef..9a4cca2 100644 --- a/xobjects/sparse/__init__.py +++ b/xobjects/sparse/__init__.py @@ -1,3 +1,3 @@ -from ._sparse import factorized_sparse_solver +from ._sparse import factorized_sparse_solver, rel_residual from . import solvers -__all__ = ["factorized_sparse_solver","solvers"] +__all__ = ["factorized_sparse_solver","solvers", "rel_residual"] diff --git a/xobjects/sparse/_sparse.py b/xobjects/sparse/_sparse.py index 9091970..80f80f6 100644 --- a/xobjects/sparse/_sparse.py +++ b/xobjects/sparse/_sparse.py @@ -4,6 +4,7 @@ # ########################################### # import scipy.sparse +import numpy.linalg as npl from numpy import ndarray as nparray from typing import Optional, Literal, Union from ..context import XContext @@ -330,4 +331,15 @@ def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, def dbugprint(verbose: bool, text: str): if verbose: - _print("[xo.sparse] "+text) \ No newline at end of file + _print("[xo.sparse] "+text) + +def rel_residual(A,x,b): + if hasattr(A, "get"): + A = A.get() + if hasattr(x, "get"): + x = x.get() + if hasattr(b, "get"): + b = b.get() + assert scipy.sparse.issparse(A), ("A must be a sparse matrix") + + return npl.norm(A@x - b) / (npl.norm(b)) \ No newline at end of file diff --git a/xobjects/sparse/_test_helpers.py b/xobjects/sparse/_test_helpers.py deleted file mode 100644 index 6b06875..0000000 --- a/xobjects/sparse/_test_helpers.py +++ /dev/null @@ -1,61 +0,0 @@ -# copyright ################################# # -# This file is part of the Xobjects Package. # -# Copyright (c) CERN, 2021. # -# ########################################### # - -import numpy.linalg as npl -import scipy.sparse.linalg as scspl -from scipy.sparse import issparse - -def issymmetric(A, tol=0): - if A.shape[0] != A.shape[1]: - return False - diff = A - A.T - if tol == 0: - return diff.nnz == 0 - else: - # tolerance-based check - return abs(diff).max() <= tol - -def rel_residual(A,x,b): - if hasattr(A, "get"): - A = A.get() - if hasattr(x, "get"): - x = x.get() - if hasattr(b, "get"): - b = b.get() - assert issparse(A), ("A must be a sparse matrix") - - return npl.norm(A@x - b) / (npl.norm(b)) - -def assert_residual_ok(res_ref, res_solver, - abs_tol=1e-12, - factor=10): - """ - Check that our solver's residual is both: - - absolutely small enough (abs_tol), - - not catastrophically worse than the reference (factor * res_ref). - """ - # sanity: reference solver itself should be good - assert res_ref < abs_tol, f"Reference residual too large: {res_ref}" - - # absolute bound - assert res_solver < abs_tol, ( - f"Residual {res_solver} exceeds absolute tolerance {abs_tol}" - ) - - # relative bound vs reference - assert res_solver <= factor * res_ref, ( - f"Residual {res_solver} not within factor {factor} of " - f"reference residual {res_ref}" - ) - -# ---- Unused rn, but could be useful for tests ---- - -def assert_close_to_precision(value, precision): - assert value <= precision, (f"Value {value} not within precision {precision}") - -def assert_residual_close(reference_residual, residual, tolerance = 10): - assert residual <= tolerance*reference_residual, ( - f"Residual {residual} not within tolerance " - f"O({tolerance}) of reference residual {reference_residual}") \ No newline at end of file From 8727a3e73bae8b25bc8c64bdaf565d48cdcdddae Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Thu, 4 Dec 2025 13:02:55 +0100 Subject: [PATCH 27/28] Add optional dependencies for sparse in pyproject.toml --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index d64975b..f0bd6ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,8 @@ Documentation = "https://xsuite.readthedocs.io/" [project.optional-dependencies] tests = ["pytest", "pytest-mock"] +sparse = ["pyklu"] +sparseGPU = ["nvmath-python"] [tool.setuptools.packages.find] where = ["."] From 11813578a2851f1f579bf88e4461f873a3c1dc07 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Fri, 5 Dec 2025 14:47:37 +0100 Subject: [PATCH 28/28] Fix example --- examples/solve_sparse_system.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/solve_sparse_system.py b/examples/solve_sparse_system.py index c9a17cb..57c346a 100644 --- a/examples/solve_sparse_system.py +++ b/examples/solve_sparse_system.py @@ -6,7 +6,7 @@ import xobjects as xo import scipy.sparse as sp import numpy as np -from xobjects.sparse._test_helpers import rel_residual +from xobjects.sparse import rel_residual ''' The goal of this example is to provide a short user guide for the xo.sparse module.