From 48213b412d06e8bd3899fc7d5b2cd05c9a29a9ea Mon Sep 17 00:00:00 2001 From: "todd.a.anderson" Date: Tue, 18 Aug 2020 16:31:31 -0500 Subject: [PATCH 01/23] Initial support for Numpy ndarray subclasses. --- numba/_typeof.c | 13 ++++++++++++- numba/core/extending.py | 2 +- numba/core/ir_utils.py | 12 ++++++++---- numba/core/pythonapi.py | 9 ++++++++- numba/core/runtime/_nrt_python.c | 4 ++-- numba/core/types/npytypes.py | 4 +++- numba/core/typing/npydecl.py | 18 ++++++++++++++++-- numba/np/arrayobj.py | 11 ++++++++--- 8 files changed, 58 insertions(+), 15 deletions(-) diff --git a/numba/_typeof.c b/numba/_typeof.c index 242150cccd0..2fd274f832f 100644 --- a/numba/_typeof.c +++ b/numba/_typeof.c @@ -767,6 +767,7 @@ int typeof_typecode(PyObject *dispatcher, PyObject *val) { PyTypeObject *tyobj = Py_TYPE(val); + int no_subtype_attr; /* This needs to be kept in sync with Dispatcher.typeof_pyval(), * otherwise funny things may happen. */ @@ -793,9 +794,19 @@ typeof_typecode(PyObject *dispatcher, PyObject *val) return typecode_arrayscalar(dispatcher, val); } /* Array handling */ - else if (PyType_IsSubtype(tyobj, &PyArray_Type)) { + else if (tyobj == &PyArray_Type) { return typecode_ndarray(dispatcher, (PyArrayObject*)val); } + /* Subtypes of Array handling */ + else if (PyType_IsSubtype(tyobj, &PyArray_Type)) { + /* If the class has an attribute named __numba_no_subtype_ndarray then + don't treat it as a normal variant of a Numpy ndarray but as it's own + separate type. */ + no_subtype_attr = PyObject_HasAttrString(val, "__numba_no_subtype_ndarray__"); + if (!no_subtype_attr) { + return typecode_ndarray(dispatcher, (PyArrayObject*)val); + } + } return typecode_using_fingerprint(dispatcher, val); } diff --git a/numba/core/extending.py b/numba/core/extending.py index 71fe22829b8..e5bbf2738c5 100644 --- a/numba/core/extending.py +++ b/numba/core/extending.py @@ -14,7 +14,7 @@ lower_setattr, lower_setattr_generic, lower_cast) # noqa: F401 from numba.core.datamodel import models # noqa: F401 from numba.core.datamodel import register_default as register_model # noqa: F401, E501 -from numba.core.pythonapi import box, unbox, reflect, NativeValue # noqa: F401 +from numba.core.pythonapi import box, unbox, reflect, NativeValue, allocator # noqa: F401 from numba._helperlib import _import_cython_function # noqa: F401 diff --git a/numba/core/ir_utils.py b/numba/core/ir_utils.py index 1c5e58b8d21..21bc98d6d47 100644 --- a/numba/core/ir_utils.py +++ b/numba/core/ir_utils.py @@ -64,6 +64,8 @@ def mk_alloc(typemap, calltypes, lhs, size_var, dtype, scope, loc): out = [] ndims = 1 size_typ = types.intp + # Get the type of the array being allocated. + arr_typ = typemap[lhs.name] if isinstance(size_var, tuple): if len(size_var) == 1: size_var = size_var[0] @@ -108,11 +110,13 @@ def mk_alloc(typemap, calltypes, lhs, size_var, dtype, scope, loc): typ_var_assign = ir.Assign(np_typ_getattr, typ_var, loc) alloc_call = ir.Expr.call(attr_var, [size_var, typ_var], (), loc) if calltypes: - calltypes[alloc_call] = typemap[attr_var.name].get_call_type( + cac = typemap[attr_var.name].get_call_type( typing.Context(), [size_typ, types.functions.NumberClass(dtype)], {}) - # signature( - # types.npytypes.Array(dtype, ndims, 'C'), size_typ, - # types.functions.NumberClass(dtype)) + # By default, all calls to "empty" are typed as returning a standard + # Numpy ndarray. If we are allocating a ndarray subclass here then + # just change the return type to be that of the subclass. + cac._return_type = arr_typ + calltypes[alloc_call] = cac alloc_assign = ir.Assign(alloc_call, lhs, loc) out.extend([g_np_assign, attr_assign, typ_var_assign, alloc_assign]) diff --git a/numba/core/pythonapi.py b/numba/core/pythonapi.py index fd5e39b37e6..10499bb3350 100644 --- a/numba/core/pythonapi.py +++ b/numba/core/pythonapi.py @@ -43,10 +43,13 @@ def lookup(self, typeclass, default=None): _boxers = _Registry() _unboxers = _Registry() _reflectors = _Registry() +# Registry of special allocators for types. +_allocators = _Registry() box = _boxers.register unbox = _unboxers.register reflect = _reflectors.register +allocator = _allocators.register class _BoxContext(namedtuple("_BoxContext", ("context", "builder", "pyapi", "env_manager"))): @@ -1163,8 +1166,11 @@ def nrt_adapt_ndarray_to_python(self, aryty, ary, dtypeptr): assert self.context.enable_nrt, "NRT required" intty = ir.IntType(32) + # Embed the Python type of the array (maybe subclass) in the LLVM. + serial_aryty_pytype = self.unserialize(self.serialize_object(aryty.PyType)) + fnty = Type.function(self.pyobj, - [self.voidptr, intty, intty, self.pyobj]) + [self.voidptr, self.pyobj, intty, intty, self.pyobj]) fn = self._get_function(fnty, name="NRT_adapt_ndarray_to_python") fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE) @@ -1174,6 +1180,7 @@ def nrt_adapt_ndarray_to_python(self, aryty, ary, dtypeptr): aryptr = cgutils.alloca_once_value(self.builder, ary) return self.builder.call(fn, [self.builder.bitcast(aryptr, self.voidptr), + serial_aryty_pytype, ndim, writable, dtypeptr]) def nrt_meminfo_new_from_pyobject(self, data, pyobj): diff --git a/numba/core/runtime/_nrt_python.c b/numba/core/runtime/_nrt_python.c index 97f7f19efb3..8a891518571 100644 --- a/numba/core/runtime/_nrt_python.c +++ b/numba/core/runtime/_nrt_python.c @@ -285,7 +285,7 @@ PyObject* try_to_return_parent(arystruct_t *arystruct, int ndim, } NUMBA_EXPORT_FUNC(PyObject *) -NRT_adapt_ndarray_to_python(arystruct_t* arystruct, int ndim, +NRT_adapt_ndarray_to_python(arystruct_t* arystruct, PyTypeObject *retty, int ndim, int writeable, PyArray_Descr *descr) { PyArrayObject *array; @@ -329,7 +329,7 @@ NRT_adapt_ndarray_to_python(arystruct_t* arystruct, int ndim, shape = arystruct->shape_and_strides; strides = shape + ndim; Py_INCREF((PyObject *) descr); - array = (PyArrayObject *) PyArray_NewFromDescr(&PyArray_Type, descr, ndim, + array = (PyArrayObject *) PyArray_NewFromDescr(retty, descr, ndim, shape, strides, arystruct->data, flags, (PyObject *) miobj); diff --git a/numba/core/types/npytypes.py b/numba/core/types/npytypes.py index 6f6307c5526..d3b5bb7d3b9 100644 --- a/numba/core/types/npytypes.py +++ b/numba/core/types/npytypes.py @@ -8,6 +8,7 @@ from numba.core import utils from .misc import UnicodeType from .containers import Bytes +import numpy as np class CharSeq(Type): """ @@ -394,8 +395,9 @@ class Array(Buffer): Type class for Numpy arrays. """ - def __init__(self, dtype, ndim, layout, readonly=False, name=None, + def __init__(self, dtype, ndim, layout, PyType=np.ndarray, readonly=False, name=None, aligned=True, addrspace=None): + self.PyType = PyType if readonly: self.mutable = False if (not aligned or diff --git a/numba/core/typing/npydecl.py b/numba/core/typing/npydecl.py index 7e11e1dfa61..9ce15d1a5ff 100644 --- a/numba/core/typing/npydecl.py +++ b/numba/core/typing/npydecl.py @@ -130,7 +130,21 @@ def generic(self, args, kws): ret_tys = ufunc_loop.outputs[-implicit_output_count:] if ndims > 0: assert layout is not None - ret_tys = [types.Array(dtype=ret_ty, ndim=ndims, layout=layout) + # If either of the types involved in the ufunc operation have a + # __array_ufunc__ method then invoke the first such one to + # determine the output type of the ufunc. + array_ufunc_type = None + for a in args: + if hasattr(a, "__array_ufunc__"): + array_ufunc_type = a + break + output_type = types.Array + if array_ufunc_type is not None: + output_type = array_ufunc_type.__array_ufunc__(ufunc, "__call__", *args, **kws) + # Eventually better error handling! FIX ME! + assert(output_type is not None) + + ret_tys = [output_type(dtype=ret_ty, ndim=ndims, layout=layout) for ret_ty in ret_tys] ret_tys = [resolve_output_type(self.context, args, ret_ty) for ret_ty in ret_tys] @@ -138,7 +152,7 @@ def generic(self, args, kws): # note: although the previous code should support multiple return values, only one # is supported as of now (signature may not support more than one). - # there is an check enforcing only one output + # there is a check enforcing only one output out.extend(args) return signature(*out) diff --git a/numba/np/arrayobj.py b/numba/np/arrayobj.py index 933b1c6565e..5749e7d9b5b 100644 --- a/numba/np/arrayobj.py +++ b/numba/np/arrayobj.py @@ -32,7 +32,7 @@ from numba.misc import quicksort, mergesort from numba.cpython import slicing from numba.cpython.unsafe.tuple import tuple_setitem - +from numba.core.pythonapi import _allocators def set_range_metadata(builder, load, lower_bound, upper_bound): """ @@ -3399,8 +3399,13 @@ def _empty_nd_impl(context, builder, arrtype, shapes): ) align = context.get_preferred_array_alignment(arrtype.dtype) - meminfo = context.nrt.meminfo_alloc_aligned(builder, size=allocsize, - align=align) + def alloc_unsupported(context, builder, size, align): + return context.nrt.meminfo_alloc_aligned(builder, size, align) + + # See if the type has a special allocator, if not use the default + # alloc_unsuppported allocator above. + allocator_impl = _allocators.lookup(arrtype.__class__, alloc_unsupported) + meminfo = allocator_impl(context, builder, size=allocsize, align=align) data = context.nrt.meminfo_data(builder, meminfo) From 3a897997c170c106f911afba640317d8d94cf69b Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Mon, 24 Aug 2020 18:29:43 -0500 Subject: [PATCH 02/23] PyType to py_type --- numba/core/pythonapi.py | 2 +- numba/core/types/npytypes.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/numba/core/pythonapi.py b/numba/core/pythonapi.py index 10499bb3350..2223a179a81 100644 --- a/numba/core/pythonapi.py +++ b/numba/core/pythonapi.py @@ -1167,7 +1167,7 @@ def nrt_adapt_ndarray_to_python(self, aryty, ary, dtypeptr): intty = ir.IntType(32) # Embed the Python type of the array (maybe subclass) in the LLVM. - serial_aryty_pytype = self.unserialize(self.serialize_object(aryty.PyType)) + serial_aryty_pytype = self.unserialize(self.serialize_object(aryty.py_type)) fnty = Type.function(self.pyobj, [self.voidptr, self.pyobj, intty, intty, self.pyobj]) diff --git a/numba/core/types/npytypes.py b/numba/core/types/npytypes.py index d3b5bb7d3b9..3c2191ca23e 100644 --- a/numba/core/types/npytypes.py +++ b/numba/core/types/npytypes.py @@ -395,9 +395,9 @@ class Array(Buffer): Type class for Numpy arrays. """ - def __init__(self, dtype, ndim, layout, PyType=np.ndarray, readonly=False, name=None, + def __init__(self, dtype, ndim, layout, py_type=np.ndarray, readonly=False, name=None, aligned=True, addrspace=None): - self.PyType = PyType + self.py_type = py_type if readonly: self.mutable = False if (not aligned or From 46a4b6f3c4b2f93d1292f2f9f9e67cdf0ca2b00c Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Sat, 19 Sep 2020 23:32:15 -0500 Subject: [PATCH 03/23] Implementation of USM-backed ndarray as dparray. --- numba/dppl/dparray.py | 293 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 293 insertions(+) create mode 100644 numba/dppl/dparray.py diff --git a/numba/dppl/dparray.py b/numba/dppl/dparray.py new file mode 100644 index 00000000000..cd6652c2fc4 --- /dev/null +++ b/numba/dppl/dparray.py @@ -0,0 +1,293 @@ +#from ._ndarray_utils import _transmogrify +import numpy as np +from inspect import getmembers, isfunction, isclass +from numbers import Number +import numba +from numba import types +from numba.extending import typeof_impl, register_model, type_callable, lower_builtin +from numba.np import numpy_support +from numba.core.pythonapi import box, allocator +from llvmlite import ir +import llvmlite.binding as llb +from numba.core import types, cgutils +import builtins +import sys +from ctypes.util import find_library +import dppl +from dppl._memory import MemoryUSMShared + +flib = find_library('mkl_intel_ilp64') +print("flib:", flib) +llb.load_library_permanently(flib) + +functions_list = [o for o in getmembers(np) if isfunction(o[1])] +class_list = [o for o in getmembers(np) if isclass(o[1])] + +class ndarray(np.ndarray): + """ + numpy.ndarray subclass whose underlying memory buffer is allocated + with a foreign allocator. + """ + def __new__(subtype, shape, + dtype=float, buffer=None, offset=0, + strides=None, order=None): + # Create a new array. + if buffer is None: + nelems = np.prod(shape) + dt = np.dtype(dtype) + isz = dt.itemsize + buf = MemoryUSMShared(nbytes=isz*max(1,nelems)) + return np.ndarray.__new__( + subtype, shape, dtype=dt, + buffer=buf, offset=0, + strides=strides, order=order) + # zero copy if buffer is a usm backed array-like thing + elif hasattr(buffer, '__sycl_usm_array_interface__'): + # also check for array interface + return np.ndarray.__new__( + subtype, shape, dtype=dt, + buffer=buffer, offset=offset, + strides=strides, order=order) + else: + # must copy + ar = np.ndarray(shape, + dtype=dtype, buffer=buffer, + offset=offset, strides=strides, + order=order) + buf = MemoryUSMShared(nbytes=ar.nbytes) + res = np.ndarray.__new__( + subtype, shape, dtype=dtype, + buffer=buf, offset=0, + strides=strides, order=order) + np.copyto(res, ar, casting='no') + return res + + def __array_finalize__(self, obj): + # When called from the explicit constructor, obj is None + if obj is None: return + # When called in new-from-template, `obj` is another instance of our own + # subclass, that we might use to update the new `self` instance. + # However, when called from view casting, `obj` can be an instance of any + # subclass of ndarray, including our own. + if hasattr(obj, '__sycl_usm_array_interface__'): + return + if isinstance(obj, np.ndarray): + ob = self + while isinstance(ob, np.ndarray): + if hasattr(obj, '__sycl_usm_array_interface__'): + return + ob = ob.base + + # trace if self has underlying mkl_mem buffer +# ob = self.base + +# while isinstance(ob, ndarray): +# ob = ob.base +# if isinstance(ob, dppl.Memory): +# return + + # Just raise an exception since __array_ufunc__ makes all reasonable cases not + # need the code below. + raise ValueError("Non-MKL allocated ndarray can not viewed as MKL-allocated one without a copy") + + """ + # since dparray must have mkl_memory underlying it, a copy must be made + newbuf = dppl.Memory(nbytes=self.data.nbytes) + new_arr = np.ndarray.__new__( + type(self), + self.shape, + buffer=newbuf, offset=0, + dtype=self.dtype, + strides=self.strides) + np.copyto(new_arr, self) + # We need to modify self to now be mkl_memory-backed ndarray + # We only need to change data and base, but these are not writeable. + # + # Modification can not be done by simply setting self either, + # as self is just a local copy of the instance. + # + # raise ValueError("Non-MKL allocated ndarray can not viewed as MKL-allocated one without a copy") + # Will probably have to raise an exception soon as Numpy may disallow this. + _transmogrify(self, new_arr) + """ + + __numba_no_subtype_ndarray__ = True + + def from_ndarray(x): + return ndarray(x.shape, x.dtype, x) + + def as_ndarray(self): + return np.ndarray(self.shape, self.dtype, self) + + def __array__(self): + return self + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + if method == '__call__': + N = None + scalars = [] + for inp in inputs: + if isinstance(inp, Number): + scalars.append(inp) + elif isinstance(inp, (self.__class__, np.ndarray)): + if isinstance(inp, self.__class__): + scalars.append(np.ndarray(inp.shape, inp.dtype, inp)) + else: + scalars.append(inp) + if N is not None: + if N != inp.shape: + raise TypeError("inconsistent sizes") + else: + N = inp.shape + else: + return NotImplemented + if kwargs.get('out', None) is None: + # maybe copy? + # deal with multiple returned arrays, so kwargs['out'] can be tuple + kwargs['out'] = empty(inputs[0].shape, dtype=get_ret_type_from_ufunc(ufunc)) + ret = ufunc(*scalars, **kwargs) + return ret +# return self.__class__(ret.shape, ret.dtype, ret) + else: + return NotImplemented + +for c in class_list: + cname = c[0] + new_func = "class %s(np.%s):\n" % (cname, cname) + if cname == "ndarray": + # Implemented explicitly above. + continue + else: + # This is temporary. + new_func += " pass\n" + # The code below should eventually be made to work and used. +# new_func += " @classmethod\n" +# new_func += " def cast(cls, some_np_obj):\n" +# new_func += " some_np_obj.__class__ = cls\n" +# new_func += " return some_np_obj\n" + try: + the_code = compile(new_func, '__init__', 'exec') + exec(the_code) + except: + pass + +# Redefine all Numpy functions in this module and if they +# return a Numpy array, transform that to a USM-backed array +# instead. This is a stop-gap. We should eventually find a +# way to do the allocation correct to start with. +for f in functions_list: + fname = f[0] + new_func = "def %s(*args, **kwargs):\n" % fname + new_func += " ret = np.%s(*args, **kwargs)\n" % fname + new_func += " if type(ret) == np.ndarray:\n" + new_func += " ret = ndarray(ret.shape, ret.dtype, ret)\n" + new_func += " return ret\n" + the_code = compile(new_func, '__init__', 'exec') + exec(the_code) + +# This class creates a type in Numba. +class DPArrayType(types.Array): + def __init__(self, dtype, ndim, layout, readonly=False, name=None, + aligned=True, addrspace=None): + # This name defines how this type will be shown in Numba's type dumps. + name = "DPArray:ndarray(%s, %sd, %s)" % (dtype, ndim, layout) + super(DPArrayType, self).__init__(dtype, ndim, layout, + py_type=ndarray, + readonly=readonly, + name=name, + addrspace=addrspace) + + # Tell Numba typing how to combine DPArrayType with other ndarray types. + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + if method == '__call__': + for inp in inputs: + if not isinstance(inp, (DPArrayType, types.Array, types.Number)): + return None + + return DPArrayType + else: + return None + +# This tells Numba how to create a DPArrayType when a dparray is passed +# into a njit function. +@typeof_impl.register(ndarray) +def typeof_ta_ndarray(val, c): + try: + dtype = numpy_support.from_dtype(val.dtype) + except NotImplementedError: + raise ValueError("Unsupported array dtype: %s" % (val.dtype,)) + layout = numpy_support.map_layout(val) + readonly = not val.flags.writeable + return DPArrayType(dtype, val.ndim, layout, readonly=readonly) + +# This tells Numba to use the default Numpy ndarray data layout for +# object of type DPArray. +register_model(DPArrayType)(numba.core.datamodel.models.ArrayModel) + +# This tells Numba how to type calls to a DPArray constructor. +@type_callable(ndarray) +def type_ndarray(context): + def typer(shape, ndim, buf): + return DPArrayType(buf.dtype, buf.ndim, buf.layout) + return typer + +# This tells Numba how to implement calls to a DPArray constructor. +@lower_builtin(ndarray, types.UniTuple, types.DType, types.Array) +def impl_ndarray(context, builder, sig, args): + # Need to allocate and copy here! + shape, ndim, buf = args + return buf + +# This tells Numba how to convert from its native representation +# of a DPArray in a njit function back to a Python DPArray. +@box(DPArrayType) +def box_array(typ, val, c): + nativearycls = c.context.make_array(typ) + nativeary = nativearycls(c.context, c.builder, value=val) + if c.context.enable_nrt: + np_dtype = numpy_support.as_dtype(typ.dtype) + dtypeptr = c.env_manager.read_const(c.env_manager.add_const(np_dtype)) + # Steals NRT ref + newary = c.pyapi.nrt_adapt_ndarray_to_python(typ, val, dtypeptr) + return newary + else: + parent = nativeary.parent + c.pyapi.incref(parent) + return parent + +# This tells Numba to use this function when it needs to allocate a +# DPArray in a njit function. +@allocator(DPArrayType) +def allocator_DPArray(context, builder, size, align): + print("allocator_DPArray") + sys.stdout.flush() + use_Numba_allocator = True + if use_Numba_allocator: + print("Using Numba allocator") + context.nrt._require_nrt() + + mod = builder.module + u32 = ir.IntType(32) + fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t, u32]) + fn = mod.get_or_insert_function(fnty, + name="NRT_MemInfo_alloc_safe_aligned") + fn.return_value.add_attribute("noalias") + if isinstance(align, builtins.int): + align = context.get_constant(types.uint32, align) + else: + assert align.type == u32, "align must be a uint32" + return builder.call(fn, [size, align]) + else: + print("Using mkl_malloc") + context.nrt._require_nrt() + + mod = builder.module + u32 = ir.IntType(32) + fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t, u32]) + fn = mod.get_or_insert_function(fnty, name="mkl_malloc") + fn.return_value.add_attribute("noalias") + if isinstance(align, builtins.int): + align = context.get_constant(types.uint32, align) + else: + assert align.type == u32, "align must be a uint32" + return builder.call(fn, [size, align]) From e12ea86989d533a4f8b422b92d9d39aa8f3deaea Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Sat, 19 Sep 2020 23:32:42 -0500 Subject: [PATCH 04/23] Initial implementation of custom allocator support. --- numba/core/runtime/nrt.c | 77 +++++++++++++++++++++++-------- numba/core/runtime/nrt.h | 11 +++-- numba/core/runtime/nrt_external.h | 16 +++++++ 3 files changed, 81 insertions(+), 23 deletions(-) diff --git a/numba/core/runtime/nrt.c b/numba/core/runtime/nrt.c index 534681d5417..9e2dc02dba4 100644 --- a/numba/core/runtime/nrt.c +++ b/numba/core/runtime/nrt.c @@ -19,6 +19,7 @@ struct MemInfo { void *dtor_info; void *data; size_t size; /* only used for NRT allocated memory */ + NRT_ExternalAllocator *external_allocator; }; @@ -170,13 +171,15 @@ void NRT_MemSys_set_atomic_cas_stub(void) { */ void NRT_MemInfo_init(NRT_MemInfo *mi,void *data, size_t size, - NRT_dtor_function dtor, void *dtor_info) + NRT_dtor_function dtor, void *dtor_info, + NRT_ExternalAllocator *external_allocator) { mi->refct = 1; /* starts with 1 refct */ mi->dtor = dtor; mi->dtor_info = dtor_info; mi->data = data; mi->size = size; + mi->external_allocator = external_allocator; /* Update stats */ TheMSys.atomic_inc(&TheMSys.stats_mi_alloc); } @@ -184,8 +187,8 @@ void NRT_MemInfo_init(NRT_MemInfo *mi,void *data, size_t size, NRT_MemInfo *NRT_MemInfo_new(void *data, size_t size, NRT_dtor_function dtor, void *dtor_info) { - NRT_MemInfo *mi = NRT_Allocate(sizeof(NRT_MemInfo)); - NRT_MemInfo_init(mi, data, size, dtor, dtor_info); + NRT_MemInfo *mi = NRT_Allocate(sizeof(NRT_MemInfo), NULL); + NRT_MemInfo_init(mi, data, size, dtor, dtor_info, NULL); return mi; } @@ -206,9 +209,9 @@ void nrt_internal_dtor_safe(void *ptr, size_t size, void *info) { } static -void *nrt_allocate_meminfo_and_data(size_t size, NRT_MemInfo **mi_out) { +void *nrt_allocate_meminfo_and_data(size_t size, NRT_MemInfo **mi_out, NRT_ExternalAllocator *allocator) { NRT_MemInfo *mi; - char *base = NRT_Allocate(sizeof(NRT_MemInfo) + size); + char *base = NRT_Allocate(sizeof(NRT_MemInfo) + size, allocator); mi = (NRT_MemInfo *) base; *mi_out = mi; return base + sizeof(NRT_MemInfo); @@ -230,9 +233,17 @@ void nrt_internal_custom_dtor_safe(void *ptr, size_t size, void *info) { NRT_MemInfo *NRT_MemInfo_alloc(size_t size) { NRT_MemInfo *mi; - void *data = nrt_allocate_meminfo_and_data(size, &mi); + void *data = nrt_allocate_meminfo_and_data(size, &mi, NULL); NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc %p\n", data)); - NRT_MemInfo_init(mi, data, size, NULL, NULL); + NRT_MemInfo_init(mi, data, size, NULL, NULL, NULL); + return mi; +} + +NRT_MemInfo *NRT_MemInfo_alloc_external(size_t size, NRT_ExternalAllocator *allocator) { + NRT_MemInfo *mi; + void *data = nrt_allocate_meminfo_and_data(size, &mi, allocator); + NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc %p\n", data)); + NRT_MemInfo_init(mi, data, size, NULL, NULL, allocator); return mi; } @@ -242,22 +253,22 @@ NRT_MemInfo *NRT_MemInfo_alloc_safe(size_t size) { NRT_MemInfo* NRT_MemInfo_alloc_dtor_safe(size_t size, NRT_dtor_function dtor) { NRT_MemInfo *mi; - void *data = nrt_allocate_meminfo_and_data(size, &mi); + void *data = nrt_allocate_meminfo_and_data(size, &mi, NULL); /* Only fill up a couple cachelines with debug markers, to minimize overhead. */ memset(data, 0xCB, MIN(size, 256)); NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_dtor_safe %p %zu\n", data, size)); - NRT_MemInfo_init(mi, data, size, nrt_internal_custom_dtor_safe, dtor); + NRT_MemInfo_init(mi, data, size, nrt_internal_custom_dtor_safe, dtor, NULL); return mi; } static void *nrt_allocate_meminfo_and_data_align(size_t size, unsigned align, - NRT_MemInfo **mi) + NRT_MemInfo **mi, NRT_ExternalAllocator *allocator) { size_t offset, intptr, remainder; - char *base = nrt_allocate_meminfo_and_data(size + 2 * align, mi); + char *base = nrt_allocate_meminfo_and_data(size + 2 * align, mi, allocator); intptr = (size_t) base; /* See if we are aligned */ remainder = intptr % align; @@ -271,26 +282,46 @@ void *nrt_allocate_meminfo_and_data_align(size_t size, unsigned align, NRT_MemInfo *NRT_MemInfo_alloc_aligned(size_t size, unsigned align) { NRT_MemInfo *mi; - void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi); + void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi, NULL); NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_aligned %p\n", data)); - NRT_MemInfo_init(mi, data, size, NULL, NULL); + NRT_MemInfo_init(mi, data, size, NULL, NULL, NULL); return mi; } NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned(size_t size, unsigned align) { NRT_MemInfo *mi; - void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi); + void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi, NULL); + /* Only fill up a couple cachelines with debug markers, to minimize + overhead. */ + memset(data, 0xCB, MIN(size, 256)); + NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_safe_aligned %p %zu\n", + data, size)); + NRT_MemInfo_init(mi, data, size, nrt_internal_dtor_safe, (void*)size, NULL); + return mi; +} + +NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned_external(size_t size, unsigned align, NRT_ExternalAllocator *allocator) { + NRT_MemInfo *mi; + void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi, NULL); /* Only fill up a couple cachelines with debug markers, to minimize overhead. */ memset(data, 0xCB, MIN(size, 256)); NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_safe_aligned %p %zu\n", data, size)); - NRT_MemInfo_init(mi, data, size, nrt_internal_dtor_safe, (void*)size); + NRT_MemInfo_init(mi, data, size, nrt_internal_dtor_safe, (void*)size, allocator); return mi; } +void NRT_dealloc(NRT_MemInfo *mi) { + if (mi->external_allocator) { + mi->external_allocator->free(mi, mi->external_allocator->opaque_data); + } else { + NRT_Free(mi); + } +} + void NRT_MemInfo_destroy(NRT_MemInfo *mi) { - NRT_Free(mi); + NRT_dealloc(mi); TheMSys.atomic_inc(&TheMSys.stats_mi_free); } @@ -352,7 +383,7 @@ nrt_varsize_dtor(void *ptr, size_t size, void *info) { NRT_MemInfo *NRT_MemInfo_new_varsize(size_t size) { NRT_MemInfo *mi; - void *data = NRT_Allocate(size); + void *data = NRT_Allocate(size, NULL); if (data == NULL) return NULL; @@ -377,7 +408,7 @@ void *NRT_MemInfo_varsize_alloc(NRT_MemInfo *mi, size_t size) "with a non varsize-allocated meminfo"); return NULL; /* unreachable */ } - mi->data = NRT_Allocate(size); + mi->data = NRT_Allocate(size, NULL); if (mi->data == NULL) return NULL; mi->size = size; @@ -413,8 +444,13 @@ void NRT_MemInfo_varsize_free(NRT_MemInfo *mi, void *ptr) * Low-level allocation wrappers. */ -void* NRT_Allocate(size_t size) { - void *ptr = TheMSys.allocator.malloc(size); +void* NRT_Allocate(size_t size, NRT_ExternalAllocator *allocator) { + void *ptr; + if (allocator) { + ptr = allocator->malloc(size, allocator->opaque_data); + } else { + ptr = TheMSys.allocator.malloc(size); + } NRT_Debug(nrt_debug_print("NRT_Allocate bytes=%zu ptr=%p\n", size, ptr)); TheMSys.atomic_inc(&TheMSys.stats_alloc); return ptr; @@ -460,6 +496,7 @@ NRT_MemInfo* nrt_manage_memory(void *data, NRT_managed_dtor dtor) { static const NRT_api_functions nrt_functions_table = { NRT_MemInfo_alloc, + NRT_MemInfo_alloc_external, nrt_manage_memory, NRT_MemInfo_acquire, NRT_MemInfo_release, diff --git a/numba/core/runtime/nrt.h b/numba/core/runtime/nrt.h index 3c74dc58f58..77d5d908b6f 100644 --- a/numba/core/runtime/nrt.h +++ b/numba/core/runtime/nrt.h @@ -22,6 +22,7 @@ All functions described here are threadsafe. /* TypeDefs */ typedef void (*NRT_dtor_function)(void *ptr, size_t size, void *info); +typedef void (*NRT_dealloc_func)(void *ptr, void *dealloc_info); typedef size_t (*NRT_atomic_inc_dec_func)(size_t *ptr); typedef int (*NRT_atomic_cas_func)(void * volatile *ptr, void *cmp, void *repl, void **oldptr); @@ -32,7 +33,6 @@ typedef void *(*NRT_malloc_func)(size_t size); typedef void *(*NRT_realloc_func)(void *ptr, size_t new_size); typedef void (*NRT_free_func)(void *ptr); - /* Memory System API */ /* Initialize the memory system */ @@ -101,7 +101,8 @@ NRT_MemInfo* NRT_MemInfo_new(void *data, size_t size, VISIBILITY_HIDDEN void NRT_MemInfo_init(NRT_MemInfo *mi, void *data, size_t size, - NRT_dtor_function dtor, void *dtor_info); + NRT_dtor_function dtor, void *dtor_info, + NRT_ExternalAllocator *external_allocator); /* * Returns the refcount of a MemInfo or (size_t)-1 if error. @@ -116,6 +117,8 @@ size_t NRT_MemInfo_refcount(NRT_MemInfo *mi); VISIBILITY_HIDDEN NRT_MemInfo *NRT_MemInfo_alloc(size_t size); +NRT_MemInfo *NRT_MemInfo_alloc_external(size_t size, NRT_ExternalAllocator *allocator); + /* * The "safe" NRT_MemInfo_alloc performs additional steps to help debug * memory errors. @@ -141,6 +144,8 @@ NRT_MemInfo *NRT_MemInfo_alloc_aligned(size_t size, unsigned align); VISIBILITY_HIDDEN NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned(size_t size, unsigned align); +NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned_external(size_t size, unsigned align, NRT_ExternalAllocator *allocator); + /* * Internal API. * Release a MemInfo. Calls NRT_MemSys_insert_meminfo. @@ -206,7 +211,7 @@ void NRT_MemInfo_dump(NRT_MemInfo *mi, FILE *out); /* * Allocate memory of `size` bytes. */ -VISIBILITY_HIDDEN void* NRT_Allocate(size_t size); +VISIBILITY_HIDDEN void* NRT_Allocate(size_t size, NRT_ExternalAllocator *allocator); /* * Deallocate memory pointed by `ptr`. diff --git a/numba/core/runtime/nrt_external.h b/numba/core/runtime/nrt_external.h index 391b6fa1b0e..a4835c36f67 100644 --- a/numba/core/runtime/nrt_external.h +++ b/numba/core/runtime/nrt_external.h @@ -7,6 +7,18 @@ typedef struct MemInfo NRT_MemInfo; typedef void NRT_managed_dtor(void *data); +typedef void *(*NRT_external_malloc_func)(size_t size, void *opaque_data); +typedef void *(*NRT_external_realloc_func)(void *ptr, size_t new_size, void *opaque_data); +typedef void (*NRT_external_free_func)(void *ptr, void *opaque_data); + +struct ExternalMemAllocator { + NRT_external_malloc_func malloc; + NRT_external_realloc_func realloc; + NRT_external_free_func free; + void *opaque_data; +}; + +typedef struct ExternalMemAllocator NRT_ExternalAllocator; typedef struct { /* Methods to create MemInfos. @@ -21,6 +33,10 @@ typedef struct { Returning a new reference. */ NRT_MemInfo* (*allocate)(size_t nbytes); + /* Allocator memory using an external allocator but still using Numba's MemInfo. + + */ + NRT_MemInfo* (*allocate_external)(size_t nbytes, NRT_ExternalAllocator *allocator); /* Convert externally allocated memory into a MemInfo. From 765d8fa25d26fe2a33c98a4178b81b49cb876afa Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Sat, 19 Sep 2020 23:53:38 -0500 Subject: [PATCH 05/23] Add some testing code for dparray but this isn't integrated with the test suite correctly yet. --- numba/dppl/tests/dppl/test_dparray.py | 93 +++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 numba/dppl/tests/dppl/test_dparray.py diff --git a/numba/dppl/tests/dppl/test_dparray.py b/numba/dppl/tests/dppl/test_dparray.py new file mode 100644 index 00000000000..1b49d10e019 --- /dev/null +++ b/numba/dppl/tests/dppl/test_dparray.py @@ -0,0 +1,93 @@ +from __future__ import print_function, division, absolute_import + +import numba +import numba.dppl.dparray as np +import numpy + +def p1(a): + return a * 2.0 + 13 + +f1 = numba.njit(p1) + +@numba.njit() +def f2(a): + return a + +@numba.njit() +def f3(a, b): + return a * np.ndarray(b.shape, b.dtype, b) + +@numba.njit() +def f4(): + return np.ones(10) + +def p5(a, b): + return a * b + +f5 = numba.njit(p5) + +@numba.njit() +def f6(a): + return a + 13 + +print("Testing Python Numpy") +z1 = numpy.ones(10) +z2 = p1(z1) +print("z2:", z2, type(z2)) +assert(isinstance(z2, numpy.ndarray)) + +print("Testing Numba Numpy") +z1 = numpy.ones(10) +z2 = f1(z1) +print("z2:", z2, type(z2)) +assert(isinstance(z2, numpy.ndarray)) + +print("Testing dparray ones") +a = np.ones(10) +print("a:", a, type(a)) +assert(isinstance(a, np.ndarray)) + +print("Testing dparray multiplication") +c = a * 5 +print("c", c, type(c)) +assert(isinstance(c, np.ndarray)) + +print("Testing Python dparray") +b = p1(c) +print("b:", b, type(b)) +assert(isinstance(b, np.ndarray)) + +print("Testing Numba dparray") +b = f1(c) +print("b:", b, type(b)) +assert(isinstance(b, np.ndarray)) + +print("Testing Numba dparray 2") +d = f2(a) +print("d:", d, type(d)) +assert(isinstance(b, np.ndarray)) + +print("Testing Numba dparray constructor from numpy.ndarray") +e = f3(a, z1) +print("e:", e, type(e)) +assert(isinstance(e, np.ndarray)) + +print("Testing Numba dparray functions") +f = f4() +print("f:", f, type(f)) +assert(isinstance(f, np.ndarray)) + +print("Testing Python mixing dparray and numpy.ndarray") +h = p5(a, z1) +print("h:", h, type(h)) +assert(isinstance(h, np.ndarray)) + +print("Testing Numba mixing dparray and numpy.ndarray") +h = f5(a, z1) +print("h:", h, type(h)) +assert(isinstance(h, np.ndarray)) + +print("Testing Numba mixing dparray and constant") +g = f6(a) +print("g:", g, type(g)) +assert(isinstance(g, np.ndarray)) From 42ff1740252a9860ae9106ff368805690e2ed6a4 Mon Sep 17 00:00:00 2001 From: "todd.a.anderson" Date: Tue, 18 Aug 2020 16:31:31 -0500 Subject: [PATCH 06/23] Initial support for Numpy ndarray subclasses. --- numba/_typeof.c | 13 ++++++++++++- numba/core/extending.py | 2 +- numba/core/ir_utils.py | 12 ++++++++---- numba/core/pythonapi.py | 9 ++++++++- numba/core/runtime/_nrt_python.c | 4 ++-- numba/core/types/npytypes.py | 4 +++- numba/core/typing/npydecl.py | 18 ++++++++++++++++-- numba/np/arrayobj.py | 11 ++++++++--- 8 files changed, 58 insertions(+), 15 deletions(-) diff --git a/numba/_typeof.c b/numba/_typeof.c index 242150cccd0..2fd274f832f 100644 --- a/numba/_typeof.c +++ b/numba/_typeof.c @@ -767,6 +767,7 @@ int typeof_typecode(PyObject *dispatcher, PyObject *val) { PyTypeObject *tyobj = Py_TYPE(val); + int no_subtype_attr; /* This needs to be kept in sync with Dispatcher.typeof_pyval(), * otherwise funny things may happen. */ @@ -793,9 +794,19 @@ typeof_typecode(PyObject *dispatcher, PyObject *val) return typecode_arrayscalar(dispatcher, val); } /* Array handling */ - else if (PyType_IsSubtype(tyobj, &PyArray_Type)) { + else if (tyobj == &PyArray_Type) { return typecode_ndarray(dispatcher, (PyArrayObject*)val); } + /* Subtypes of Array handling */ + else if (PyType_IsSubtype(tyobj, &PyArray_Type)) { + /* If the class has an attribute named __numba_no_subtype_ndarray then + don't treat it as a normal variant of a Numpy ndarray but as it's own + separate type. */ + no_subtype_attr = PyObject_HasAttrString(val, "__numba_no_subtype_ndarray__"); + if (!no_subtype_attr) { + return typecode_ndarray(dispatcher, (PyArrayObject*)val); + } + } return typecode_using_fingerprint(dispatcher, val); } diff --git a/numba/core/extending.py b/numba/core/extending.py index 71fe22829b8..e5bbf2738c5 100644 --- a/numba/core/extending.py +++ b/numba/core/extending.py @@ -14,7 +14,7 @@ lower_setattr, lower_setattr_generic, lower_cast) # noqa: F401 from numba.core.datamodel import models # noqa: F401 from numba.core.datamodel import register_default as register_model # noqa: F401, E501 -from numba.core.pythonapi import box, unbox, reflect, NativeValue # noqa: F401 +from numba.core.pythonapi import box, unbox, reflect, NativeValue, allocator # noqa: F401 from numba._helperlib import _import_cython_function # noqa: F401 diff --git a/numba/core/ir_utils.py b/numba/core/ir_utils.py index 1c5e58b8d21..21bc98d6d47 100644 --- a/numba/core/ir_utils.py +++ b/numba/core/ir_utils.py @@ -64,6 +64,8 @@ def mk_alloc(typemap, calltypes, lhs, size_var, dtype, scope, loc): out = [] ndims = 1 size_typ = types.intp + # Get the type of the array being allocated. + arr_typ = typemap[lhs.name] if isinstance(size_var, tuple): if len(size_var) == 1: size_var = size_var[0] @@ -108,11 +110,13 @@ def mk_alloc(typemap, calltypes, lhs, size_var, dtype, scope, loc): typ_var_assign = ir.Assign(np_typ_getattr, typ_var, loc) alloc_call = ir.Expr.call(attr_var, [size_var, typ_var], (), loc) if calltypes: - calltypes[alloc_call] = typemap[attr_var.name].get_call_type( + cac = typemap[attr_var.name].get_call_type( typing.Context(), [size_typ, types.functions.NumberClass(dtype)], {}) - # signature( - # types.npytypes.Array(dtype, ndims, 'C'), size_typ, - # types.functions.NumberClass(dtype)) + # By default, all calls to "empty" are typed as returning a standard + # Numpy ndarray. If we are allocating a ndarray subclass here then + # just change the return type to be that of the subclass. + cac._return_type = arr_typ + calltypes[alloc_call] = cac alloc_assign = ir.Assign(alloc_call, lhs, loc) out.extend([g_np_assign, attr_assign, typ_var_assign, alloc_assign]) diff --git a/numba/core/pythonapi.py b/numba/core/pythonapi.py index fd5e39b37e6..10499bb3350 100644 --- a/numba/core/pythonapi.py +++ b/numba/core/pythonapi.py @@ -43,10 +43,13 @@ def lookup(self, typeclass, default=None): _boxers = _Registry() _unboxers = _Registry() _reflectors = _Registry() +# Registry of special allocators for types. +_allocators = _Registry() box = _boxers.register unbox = _unboxers.register reflect = _reflectors.register +allocator = _allocators.register class _BoxContext(namedtuple("_BoxContext", ("context", "builder", "pyapi", "env_manager"))): @@ -1163,8 +1166,11 @@ def nrt_adapt_ndarray_to_python(self, aryty, ary, dtypeptr): assert self.context.enable_nrt, "NRT required" intty = ir.IntType(32) + # Embed the Python type of the array (maybe subclass) in the LLVM. + serial_aryty_pytype = self.unserialize(self.serialize_object(aryty.PyType)) + fnty = Type.function(self.pyobj, - [self.voidptr, intty, intty, self.pyobj]) + [self.voidptr, self.pyobj, intty, intty, self.pyobj]) fn = self._get_function(fnty, name="NRT_adapt_ndarray_to_python") fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE) @@ -1174,6 +1180,7 @@ def nrt_adapt_ndarray_to_python(self, aryty, ary, dtypeptr): aryptr = cgutils.alloca_once_value(self.builder, ary) return self.builder.call(fn, [self.builder.bitcast(aryptr, self.voidptr), + serial_aryty_pytype, ndim, writable, dtypeptr]) def nrt_meminfo_new_from_pyobject(self, data, pyobj): diff --git a/numba/core/runtime/_nrt_python.c b/numba/core/runtime/_nrt_python.c index 97f7f19efb3..8a891518571 100644 --- a/numba/core/runtime/_nrt_python.c +++ b/numba/core/runtime/_nrt_python.c @@ -285,7 +285,7 @@ PyObject* try_to_return_parent(arystruct_t *arystruct, int ndim, } NUMBA_EXPORT_FUNC(PyObject *) -NRT_adapt_ndarray_to_python(arystruct_t* arystruct, int ndim, +NRT_adapt_ndarray_to_python(arystruct_t* arystruct, PyTypeObject *retty, int ndim, int writeable, PyArray_Descr *descr) { PyArrayObject *array; @@ -329,7 +329,7 @@ NRT_adapt_ndarray_to_python(arystruct_t* arystruct, int ndim, shape = arystruct->shape_and_strides; strides = shape + ndim; Py_INCREF((PyObject *) descr); - array = (PyArrayObject *) PyArray_NewFromDescr(&PyArray_Type, descr, ndim, + array = (PyArrayObject *) PyArray_NewFromDescr(retty, descr, ndim, shape, strides, arystruct->data, flags, (PyObject *) miobj); diff --git a/numba/core/types/npytypes.py b/numba/core/types/npytypes.py index 6f6307c5526..d3b5bb7d3b9 100644 --- a/numba/core/types/npytypes.py +++ b/numba/core/types/npytypes.py @@ -8,6 +8,7 @@ from numba.core import utils from .misc import UnicodeType from .containers import Bytes +import numpy as np class CharSeq(Type): """ @@ -394,8 +395,9 @@ class Array(Buffer): Type class for Numpy arrays. """ - def __init__(self, dtype, ndim, layout, readonly=False, name=None, + def __init__(self, dtype, ndim, layout, PyType=np.ndarray, readonly=False, name=None, aligned=True, addrspace=None): + self.PyType = PyType if readonly: self.mutable = False if (not aligned or diff --git a/numba/core/typing/npydecl.py b/numba/core/typing/npydecl.py index 7e11e1dfa61..9ce15d1a5ff 100644 --- a/numba/core/typing/npydecl.py +++ b/numba/core/typing/npydecl.py @@ -130,7 +130,21 @@ def generic(self, args, kws): ret_tys = ufunc_loop.outputs[-implicit_output_count:] if ndims > 0: assert layout is not None - ret_tys = [types.Array(dtype=ret_ty, ndim=ndims, layout=layout) + # If either of the types involved in the ufunc operation have a + # __array_ufunc__ method then invoke the first such one to + # determine the output type of the ufunc. + array_ufunc_type = None + for a in args: + if hasattr(a, "__array_ufunc__"): + array_ufunc_type = a + break + output_type = types.Array + if array_ufunc_type is not None: + output_type = array_ufunc_type.__array_ufunc__(ufunc, "__call__", *args, **kws) + # Eventually better error handling! FIX ME! + assert(output_type is not None) + + ret_tys = [output_type(dtype=ret_ty, ndim=ndims, layout=layout) for ret_ty in ret_tys] ret_tys = [resolve_output_type(self.context, args, ret_ty) for ret_ty in ret_tys] @@ -138,7 +152,7 @@ def generic(self, args, kws): # note: although the previous code should support multiple return values, only one # is supported as of now (signature may not support more than one). - # there is an check enforcing only one output + # there is a check enforcing only one output out.extend(args) return signature(*out) diff --git a/numba/np/arrayobj.py b/numba/np/arrayobj.py index 933b1c6565e..5749e7d9b5b 100644 --- a/numba/np/arrayobj.py +++ b/numba/np/arrayobj.py @@ -32,7 +32,7 @@ from numba.misc import quicksort, mergesort from numba.cpython import slicing from numba.cpython.unsafe.tuple import tuple_setitem - +from numba.core.pythonapi import _allocators def set_range_metadata(builder, load, lower_bound, upper_bound): """ @@ -3399,8 +3399,13 @@ def _empty_nd_impl(context, builder, arrtype, shapes): ) align = context.get_preferred_array_alignment(arrtype.dtype) - meminfo = context.nrt.meminfo_alloc_aligned(builder, size=allocsize, - align=align) + def alloc_unsupported(context, builder, size, align): + return context.nrt.meminfo_alloc_aligned(builder, size, align) + + # See if the type has a special allocator, if not use the default + # alloc_unsuppported allocator above. + allocator_impl = _allocators.lookup(arrtype.__class__, alloc_unsupported) + meminfo = allocator_impl(context, builder, size=allocsize, align=align) data = context.nrt.meminfo_data(builder, meminfo) From f07879bb405a381c8633274e745a3d1df90e9442 Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Mon, 24 Aug 2020 18:29:43 -0500 Subject: [PATCH 07/23] PyType to py_type --- numba/core/pythonapi.py | 2 +- numba/core/types/npytypes.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/numba/core/pythonapi.py b/numba/core/pythonapi.py index 10499bb3350..2223a179a81 100644 --- a/numba/core/pythonapi.py +++ b/numba/core/pythonapi.py @@ -1167,7 +1167,7 @@ def nrt_adapt_ndarray_to_python(self, aryty, ary, dtypeptr): intty = ir.IntType(32) # Embed the Python type of the array (maybe subclass) in the LLVM. - serial_aryty_pytype = self.unserialize(self.serialize_object(aryty.PyType)) + serial_aryty_pytype = self.unserialize(self.serialize_object(aryty.py_type)) fnty = Type.function(self.pyobj, [self.voidptr, self.pyobj, intty, intty, self.pyobj]) diff --git a/numba/core/types/npytypes.py b/numba/core/types/npytypes.py index d3b5bb7d3b9..3c2191ca23e 100644 --- a/numba/core/types/npytypes.py +++ b/numba/core/types/npytypes.py @@ -395,9 +395,9 @@ class Array(Buffer): Type class for Numpy arrays. """ - def __init__(self, dtype, ndim, layout, PyType=np.ndarray, readonly=False, name=None, + def __init__(self, dtype, ndim, layout, py_type=np.ndarray, readonly=False, name=None, aligned=True, addrspace=None): - self.PyType = PyType + self.py_type = py_type if readonly: self.mutable = False if (not aligned or From fdab21135b97c829a69dfcfc88d97276dbd828e1 Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Sat, 19 Sep 2020 23:32:15 -0500 Subject: [PATCH 08/23] Implementation of USM-backed ndarray as dparray. --- numba/dppl/dparray.py | 293 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 293 insertions(+) create mode 100644 numba/dppl/dparray.py diff --git a/numba/dppl/dparray.py b/numba/dppl/dparray.py new file mode 100644 index 00000000000..cd6652c2fc4 --- /dev/null +++ b/numba/dppl/dparray.py @@ -0,0 +1,293 @@ +#from ._ndarray_utils import _transmogrify +import numpy as np +from inspect import getmembers, isfunction, isclass +from numbers import Number +import numba +from numba import types +from numba.extending import typeof_impl, register_model, type_callable, lower_builtin +from numba.np import numpy_support +from numba.core.pythonapi import box, allocator +from llvmlite import ir +import llvmlite.binding as llb +from numba.core import types, cgutils +import builtins +import sys +from ctypes.util import find_library +import dppl +from dppl._memory import MemoryUSMShared + +flib = find_library('mkl_intel_ilp64') +print("flib:", flib) +llb.load_library_permanently(flib) + +functions_list = [o for o in getmembers(np) if isfunction(o[1])] +class_list = [o for o in getmembers(np) if isclass(o[1])] + +class ndarray(np.ndarray): + """ + numpy.ndarray subclass whose underlying memory buffer is allocated + with a foreign allocator. + """ + def __new__(subtype, shape, + dtype=float, buffer=None, offset=0, + strides=None, order=None): + # Create a new array. + if buffer is None: + nelems = np.prod(shape) + dt = np.dtype(dtype) + isz = dt.itemsize + buf = MemoryUSMShared(nbytes=isz*max(1,nelems)) + return np.ndarray.__new__( + subtype, shape, dtype=dt, + buffer=buf, offset=0, + strides=strides, order=order) + # zero copy if buffer is a usm backed array-like thing + elif hasattr(buffer, '__sycl_usm_array_interface__'): + # also check for array interface + return np.ndarray.__new__( + subtype, shape, dtype=dt, + buffer=buffer, offset=offset, + strides=strides, order=order) + else: + # must copy + ar = np.ndarray(shape, + dtype=dtype, buffer=buffer, + offset=offset, strides=strides, + order=order) + buf = MemoryUSMShared(nbytes=ar.nbytes) + res = np.ndarray.__new__( + subtype, shape, dtype=dtype, + buffer=buf, offset=0, + strides=strides, order=order) + np.copyto(res, ar, casting='no') + return res + + def __array_finalize__(self, obj): + # When called from the explicit constructor, obj is None + if obj is None: return + # When called in new-from-template, `obj` is another instance of our own + # subclass, that we might use to update the new `self` instance. + # However, when called from view casting, `obj` can be an instance of any + # subclass of ndarray, including our own. + if hasattr(obj, '__sycl_usm_array_interface__'): + return + if isinstance(obj, np.ndarray): + ob = self + while isinstance(ob, np.ndarray): + if hasattr(obj, '__sycl_usm_array_interface__'): + return + ob = ob.base + + # trace if self has underlying mkl_mem buffer +# ob = self.base + +# while isinstance(ob, ndarray): +# ob = ob.base +# if isinstance(ob, dppl.Memory): +# return + + # Just raise an exception since __array_ufunc__ makes all reasonable cases not + # need the code below. + raise ValueError("Non-MKL allocated ndarray can not viewed as MKL-allocated one without a copy") + + """ + # since dparray must have mkl_memory underlying it, a copy must be made + newbuf = dppl.Memory(nbytes=self.data.nbytes) + new_arr = np.ndarray.__new__( + type(self), + self.shape, + buffer=newbuf, offset=0, + dtype=self.dtype, + strides=self.strides) + np.copyto(new_arr, self) + # We need to modify self to now be mkl_memory-backed ndarray + # We only need to change data and base, but these are not writeable. + # + # Modification can not be done by simply setting self either, + # as self is just a local copy of the instance. + # + # raise ValueError("Non-MKL allocated ndarray can not viewed as MKL-allocated one without a copy") + # Will probably have to raise an exception soon as Numpy may disallow this. + _transmogrify(self, new_arr) + """ + + __numba_no_subtype_ndarray__ = True + + def from_ndarray(x): + return ndarray(x.shape, x.dtype, x) + + def as_ndarray(self): + return np.ndarray(self.shape, self.dtype, self) + + def __array__(self): + return self + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + if method == '__call__': + N = None + scalars = [] + for inp in inputs: + if isinstance(inp, Number): + scalars.append(inp) + elif isinstance(inp, (self.__class__, np.ndarray)): + if isinstance(inp, self.__class__): + scalars.append(np.ndarray(inp.shape, inp.dtype, inp)) + else: + scalars.append(inp) + if N is not None: + if N != inp.shape: + raise TypeError("inconsistent sizes") + else: + N = inp.shape + else: + return NotImplemented + if kwargs.get('out', None) is None: + # maybe copy? + # deal with multiple returned arrays, so kwargs['out'] can be tuple + kwargs['out'] = empty(inputs[0].shape, dtype=get_ret_type_from_ufunc(ufunc)) + ret = ufunc(*scalars, **kwargs) + return ret +# return self.__class__(ret.shape, ret.dtype, ret) + else: + return NotImplemented + +for c in class_list: + cname = c[0] + new_func = "class %s(np.%s):\n" % (cname, cname) + if cname == "ndarray": + # Implemented explicitly above. + continue + else: + # This is temporary. + new_func += " pass\n" + # The code below should eventually be made to work and used. +# new_func += " @classmethod\n" +# new_func += " def cast(cls, some_np_obj):\n" +# new_func += " some_np_obj.__class__ = cls\n" +# new_func += " return some_np_obj\n" + try: + the_code = compile(new_func, '__init__', 'exec') + exec(the_code) + except: + pass + +# Redefine all Numpy functions in this module and if they +# return a Numpy array, transform that to a USM-backed array +# instead. This is a stop-gap. We should eventually find a +# way to do the allocation correct to start with. +for f in functions_list: + fname = f[0] + new_func = "def %s(*args, **kwargs):\n" % fname + new_func += " ret = np.%s(*args, **kwargs)\n" % fname + new_func += " if type(ret) == np.ndarray:\n" + new_func += " ret = ndarray(ret.shape, ret.dtype, ret)\n" + new_func += " return ret\n" + the_code = compile(new_func, '__init__', 'exec') + exec(the_code) + +# This class creates a type in Numba. +class DPArrayType(types.Array): + def __init__(self, dtype, ndim, layout, readonly=False, name=None, + aligned=True, addrspace=None): + # This name defines how this type will be shown in Numba's type dumps. + name = "DPArray:ndarray(%s, %sd, %s)" % (dtype, ndim, layout) + super(DPArrayType, self).__init__(dtype, ndim, layout, + py_type=ndarray, + readonly=readonly, + name=name, + addrspace=addrspace) + + # Tell Numba typing how to combine DPArrayType with other ndarray types. + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + if method == '__call__': + for inp in inputs: + if not isinstance(inp, (DPArrayType, types.Array, types.Number)): + return None + + return DPArrayType + else: + return None + +# This tells Numba how to create a DPArrayType when a dparray is passed +# into a njit function. +@typeof_impl.register(ndarray) +def typeof_ta_ndarray(val, c): + try: + dtype = numpy_support.from_dtype(val.dtype) + except NotImplementedError: + raise ValueError("Unsupported array dtype: %s" % (val.dtype,)) + layout = numpy_support.map_layout(val) + readonly = not val.flags.writeable + return DPArrayType(dtype, val.ndim, layout, readonly=readonly) + +# This tells Numba to use the default Numpy ndarray data layout for +# object of type DPArray. +register_model(DPArrayType)(numba.core.datamodel.models.ArrayModel) + +# This tells Numba how to type calls to a DPArray constructor. +@type_callable(ndarray) +def type_ndarray(context): + def typer(shape, ndim, buf): + return DPArrayType(buf.dtype, buf.ndim, buf.layout) + return typer + +# This tells Numba how to implement calls to a DPArray constructor. +@lower_builtin(ndarray, types.UniTuple, types.DType, types.Array) +def impl_ndarray(context, builder, sig, args): + # Need to allocate and copy here! + shape, ndim, buf = args + return buf + +# This tells Numba how to convert from its native representation +# of a DPArray in a njit function back to a Python DPArray. +@box(DPArrayType) +def box_array(typ, val, c): + nativearycls = c.context.make_array(typ) + nativeary = nativearycls(c.context, c.builder, value=val) + if c.context.enable_nrt: + np_dtype = numpy_support.as_dtype(typ.dtype) + dtypeptr = c.env_manager.read_const(c.env_manager.add_const(np_dtype)) + # Steals NRT ref + newary = c.pyapi.nrt_adapt_ndarray_to_python(typ, val, dtypeptr) + return newary + else: + parent = nativeary.parent + c.pyapi.incref(parent) + return parent + +# This tells Numba to use this function when it needs to allocate a +# DPArray in a njit function. +@allocator(DPArrayType) +def allocator_DPArray(context, builder, size, align): + print("allocator_DPArray") + sys.stdout.flush() + use_Numba_allocator = True + if use_Numba_allocator: + print("Using Numba allocator") + context.nrt._require_nrt() + + mod = builder.module + u32 = ir.IntType(32) + fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t, u32]) + fn = mod.get_or_insert_function(fnty, + name="NRT_MemInfo_alloc_safe_aligned") + fn.return_value.add_attribute("noalias") + if isinstance(align, builtins.int): + align = context.get_constant(types.uint32, align) + else: + assert align.type == u32, "align must be a uint32" + return builder.call(fn, [size, align]) + else: + print("Using mkl_malloc") + context.nrt._require_nrt() + + mod = builder.module + u32 = ir.IntType(32) + fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t, u32]) + fn = mod.get_or_insert_function(fnty, name="mkl_malloc") + fn.return_value.add_attribute("noalias") + if isinstance(align, builtins.int): + align = context.get_constant(types.uint32, align) + else: + assert align.type == u32, "align must be a uint32" + return builder.call(fn, [size, align]) From 390e679036f66c16a43e440f945d1f819b88d98c Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Sat, 19 Sep 2020 23:32:42 -0500 Subject: [PATCH 09/23] Initial implementation of custom allocator support. --- numba/core/runtime/nrt.c | 77 +++++++++++++++++++++++-------- numba/core/runtime/nrt.h | 11 +++-- numba/core/runtime/nrt_external.h | 16 +++++++ 3 files changed, 81 insertions(+), 23 deletions(-) diff --git a/numba/core/runtime/nrt.c b/numba/core/runtime/nrt.c index 534681d5417..9e2dc02dba4 100644 --- a/numba/core/runtime/nrt.c +++ b/numba/core/runtime/nrt.c @@ -19,6 +19,7 @@ struct MemInfo { void *dtor_info; void *data; size_t size; /* only used for NRT allocated memory */ + NRT_ExternalAllocator *external_allocator; }; @@ -170,13 +171,15 @@ void NRT_MemSys_set_atomic_cas_stub(void) { */ void NRT_MemInfo_init(NRT_MemInfo *mi,void *data, size_t size, - NRT_dtor_function dtor, void *dtor_info) + NRT_dtor_function dtor, void *dtor_info, + NRT_ExternalAllocator *external_allocator) { mi->refct = 1; /* starts with 1 refct */ mi->dtor = dtor; mi->dtor_info = dtor_info; mi->data = data; mi->size = size; + mi->external_allocator = external_allocator; /* Update stats */ TheMSys.atomic_inc(&TheMSys.stats_mi_alloc); } @@ -184,8 +187,8 @@ void NRT_MemInfo_init(NRT_MemInfo *mi,void *data, size_t size, NRT_MemInfo *NRT_MemInfo_new(void *data, size_t size, NRT_dtor_function dtor, void *dtor_info) { - NRT_MemInfo *mi = NRT_Allocate(sizeof(NRT_MemInfo)); - NRT_MemInfo_init(mi, data, size, dtor, dtor_info); + NRT_MemInfo *mi = NRT_Allocate(sizeof(NRT_MemInfo), NULL); + NRT_MemInfo_init(mi, data, size, dtor, dtor_info, NULL); return mi; } @@ -206,9 +209,9 @@ void nrt_internal_dtor_safe(void *ptr, size_t size, void *info) { } static -void *nrt_allocate_meminfo_and_data(size_t size, NRT_MemInfo **mi_out) { +void *nrt_allocate_meminfo_and_data(size_t size, NRT_MemInfo **mi_out, NRT_ExternalAllocator *allocator) { NRT_MemInfo *mi; - char *base = NRT_Allocate(sizeof(NRT_MemInfo) + size); + char *base = NRT_Allocate(sizeof(NRT_MemInfo) + size, allocator); mi = (NRT_MemInfo *) base; *mi_out = mi; return base + sizeof(NRT_MemInfo); @@ -230,9 +233,17 @@ void nrt_internal_custom_dtor_safe(void *ptr, size_t size, void *info) { NRT_MemInfo *NRT_MemInfo_alloc(size_t size) { NRT_MemInfo *mi; - void *data = nrt_allocate_meminfo_and_data(size, &mi); + void *data = nrt_allocate_meminfo_and_data(size, &mi, NULL); NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc %p\n", data)); - NRT_MemInfo_init(mi, data, size, NULL, NULL); + NRT_MemInfo_init(mi, data, size, NULL, NULL, NULL); + return mi; +} + +NRT_MemInfo *NRT_MemInfo_alloc_external(size_t size, NRT_ExternalAllocator *allocator) { + NRT_MemInfo *mi; + void *data = nrt_allocate_meminfo_and_data(size, &mi, allocator); + NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc %p\n", data)); + NRT_MemInfo_init(mi, data, size, NULL, NULL, allocator); return mi; } @@ -242,22 +253,22 @@ NRT_MemInfo *NRT_MemInfo_alloc_safe(size_t size) { NRT_MemInfo* NRT_MemInfo_alloc_dtor_safe(size_t size, NRT_dtor_function dtor) { NRT_MemInfo *mi; - void *data = nrt_allocate_meminfo_and_data(size, &mi); + void *data = nrt_allocate_meminfo_and_data(size, &mi, NULL); /* Only fill up a couple cachelines with debug markers, to minimize overhead. */ memset(data, 0xCB, MIN(size, 256)); NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_dtor_safe %p %zu\n", data, size)); - NRT_MemInfo_init(mi, data, size, nrt_internal_custom_dtor_safe, dtor); + NRT_MemInfo_init(mi, data, size, nrt_internal_custom_dtor_safe, dtor, NULL); return mi; } static void *nrt_allocate_meminfo_and_data_align(size_t size, unsigned align, - NRT_MemInfo **mi) + NRT_MemInfo **mi, NRT_ExternalAllocator *allocator) { size_t offset, intptr, remainder; - char *base = nrt_allocate_meminfo_and_data(size + 2 * align, mi); + char *base = nrt_allocate_meminfo_and_data(size + 2 * align, mi, allocator); intptr = (size_t) base; /* See if we are aligned */ remainder = intptr % align; @@ -271,26 +282,46 @@ void *nrt_allocate_meminfo_and_data_align(size_t size, unsigned align, NRT_MemInfo *NRT_MemInfo_alloc_aligned(size_t size, unsigned align) { NRT_MemInfo *mi; - void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi); + void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi, NULL); NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_aligned %p\n", data)); - NRT_MemInfo_init(mi, data, size, NULL, NULL); + NRT_MemInfo_init(mi, data, size, NULL, NULL, NULL); return mi; } NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned(size_t size, unsigned align) { NRT_MemInfo *mi; - void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi); + void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi, NULL); + /* Only fill up a couple cachelines with debug markers, to minimize + overhead. */ + memset(data, 0xCB, MIN(size, 256)); + NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_safe_aligned %p %zu\n", + data, size)); + NRT_MemInfo_init(mi, data, size, nrt_internal_dtor_safe, (void*)size, NULL); + return mi; +} + +NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned_external(size_t size, unsigned align, NRT_ExternalAllocator *allocator) { + NRT_MemInfo *mi; + void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi, NULL); /* Only fill up a couple cachelines with debug markers, to minimize overhead. */ memset(data, 0xCB, MIN(size, 256)); NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_safe_aligned %p %zu\n", data, size)); - NRT_MemInfo_init(mi, data, size, nrt_internal_dtor_safe, (void*)size); + NRT_MemInfo_init(mi, data, size, nrt_internal_dtor_safe, (void*)size, allocator); return mi; } +void NRT_dealloc(NRT_MemInfo *mi) { + if (mi->external_allocator) { + mi->external_allocator->free(mi, mi->external_allocator->opaque_data); + } else { + NRT_Free(mi); + } +} + void NRT_MemInfo_destroy(NRT_MemInfo *mi) { - NRT_Free(mi); + NRT_dealloc(mi); TheMSys.atomic_inc(&TheMSys.stats_mi_free); } @@ -352,7 +383,7 @@ nrt_varsize_dtor(void *ptr, size_t size, void *info) { NRT_MemInfo *NRT_MemInfo_new_varsize(size_t size) { NRT_MemInfo *mi; - void *data = NRT_Allocate(size); + void *data = NRT_Allocate(size, NULL); if (data == NULL) return NULL; @@ -377,7 +408,7 @@ void *NRT_MemInfo_varsize_alloc(NRT_MemInfo *mi, size_t size) "with a non varsize-allocated meminfo"); return NULL; /* unreachable */ } - mi->data = NRT_Allocate(size); + mi->data = NRT_Allocate(size, NULL); if (mi->data == NULL) return NULL; mi->size = size; @@ -413,8 +444,13 @@ void NRT_MemInfo_varsize_free(NRT_MemInfo *mi, void *ptr) * Low-level allocation wrappers. */ -void* NRT_Allocate(size_t size) { - void *ptr = TheMSys.allocator.malloc(size); +void* NRT_Allocate(size_t size, NRT_ExternalAllocator *allocator) { + void *ptr; + if (allocator) { + ptr = allocator->malloc(size, allocator->opaque_data); + } else { + ptr = TheMSys.allocator.malloc(size); + } NRT_Debug(nrt_debug_print("NRT_Allocate bytes=%zu ptr=%p\n", size, ptr)); TheMSys.atomic_inc(&TheMSys.stats_alloc); return ptr; @@ -460,6 +496,7 @@ NRT_MemInfo* nrt_manage_memory(void *data, NRT_managed_dtor dtor) { static const NRT_api_functions nrt_functions_table = { NRT_MemInfo_alloc, + NRT_MemInfo_alloc_external, nrt_manage_memory, NRT_MemInfo_acquire, NRT_MemInfo_release, diff --git a/numba/core/runtime/nrt.h b/numba/core/runtime/nrt.h index 3c74dc58f58..77d5d908b6f 100644 --- a/numba/core/runtime/nrt.h +++ b/numba/core/runtime/nrt.h @@ -22,6 +22,7 @@ All functions described here are threadsafe. /* TypeDefs */ typedef void (*NRT_dtor_function)(void *ptr, size_t size, void *info); +typedef void (*NRT_dealloc_func)(void *ptr, void *dealloc_info); typedef size_t (*NRT_atomic_inc_dec_func)(size_t *ptr); typedef int (*NRT_atomic_cas_func)(void * volatile *ptr, void *cmp, void *repl, void **oldptr); @@ -32,7 +33,6 @@ typedef void *(*NRT_malloc_func)(size_t size); typedef void *(*NRT_realloc_func)(void *ptr, size_t new_size); typedef void (*NRT_free_func)(void *ptr); - /* Memory System API */ /* Initialize the memory system */ @@ -101,7 +101,8 @@ NRT_MemInfo* NRT_MemInfo_new(void *data, size_t size, VISIBILITY_HIDDEN void NRT_MemInfo_init(NRT_MemInfo *mi, void *data, size_t size, - NRT_dtor_function dtor, void *dtor_info); + NRT_dtor_function dtor, void *dtor_info, + NRT_ExternalAllocator *external_allocator); /* * Returns the refcount of a MemInfo or (size_t)-1 if error. @@ -116,6 +117,8 @@ size_t NRT_MemInfo_refcount(NRT_MemInfo *mi); VISIBILITY_HIDDEN NRT_MemInfo *NRT_MemInfo_alloc(size_t size); +NRT_MemInfo *NRT_MemInfo_alloc_external(size_t size, NRT_ExternalAllocator *allocator); + /* * The "safe" NRT_MemInfo_alloc performs additional steps to help debug * memory errors. @@ -141,6 +144,8 @@ NRT_MemInfo *NRT_MemInfo_alloc_aligned(size_t size, unsigned align); VISIBILITY_HIDDEN NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned(size_t size, unsigned align); +NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned_external(size_t size, unsigned align, NRT_ExternalAllocator *allocator); + /* * Internal API. * Release a MemInfo. Calls NRT_MemSys_insert_meminfo. @@ -206,7 +211,7 @@ void NRT_MemInfo_dump(NRT_MemInfo *mi, FILE *out); /* * Allocate memory of `size` bytes. */ -VISIBILITY_HIDDEN void* NRT_Allocate(size_t size); +VISIBILITY_HIDDEN void* NRT_Allocate(size_t size, NRT_ExternalAllocator *allocator); /* * Deallocate memory pointed by `ptr`. diff --git a/numba/core/runtime/nrt_external.h b/numba/core/runtime/nrt_external.h index 391b6fa1b0e..a4835c36f67 100644 --- a/numba/core/runtime/nrt_external.h +++ b/numba/core/runtime/nrt_external.h @@ -7,6 +7,18 @@ typedef struct MemInfo NRT_MemInfo; typedef void NRT_managed_dtor(void *data); +typedef void *(*NRT_external_malloc_func)(size_t size, void *opaque_data); +typedef void *(*NRT_external_realloc_func)(void *ptr, size_t new_size, void *opaque_data); +typedef void (*NRT_external_free_func)(void *ptr, void *opaque_data); + +struct ExternalMemAllocator { + NRT_external_malloc_func malloc; + NRT_external_realloc_func realloc; + NRT_external_free_func free; + void *opaque_data; +}; + +typedef struct ExternalMemAllocator NRT_ExternalAllocator; typedef struct { /* Methods to create MemInfos. @@ -21,6 +33,10 @@ typedef struct { Returning a new reference. */ NRT_MemInfo* (*allocate)(size_t nbytes); + /* Allocator memory using an external allocator but still using Numba's MemInfo. + + */ + NRT_MemInfo* (*allocate_external)(size_t nbytes, NRT_ExternalAllocator *allocator); /* Convert externally allocated memory into a MemInfo. From 9a271b2b4534dfa7874ccdbbcb109a34d5af757a Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Sat, 19 Sep 2020 23:53:38 -0500 Subject: [PATCH 10/23] Add some testing code for dparray but this isn't integrated with the test suite correctly yet. --- numba/dppl/tests/dppl/test_dparray.py | 93 +++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 numba/dppl/tests/dppl/test_dparray.py diff --git a/numba/dppl/tests/dppl/test_dparray.py b/numba/dppl/tests/dppl/test_dparray.py new file mode 100644 index 00000000000..1b49d10e019 --- /dev/null +++ b/numba/dppl/tests/dppl/test_dparray.py @@ -0,0 +1,93 @@ +from __future__ import print_function, division, absolute_import + +import numba +import numba.dppl.dparray as np +import numpy + +def p1(a): + return a * 2.0 + 13 + +f1 = numba.njit(p1) + +@numba.njit() +def f2(a): + return a + +@numba.njit() +def f3(a, b): + return a * np.ndarray(b.shape, b.dtype, b) + +@numba.njit() +def f4(): + return np.ones(10) + +def p5(a, b): + return a * b + +f5 = numba.njit(p5) + +@numba.njit() +def f6(a): + return a + 13 + +print("Testing Python Numpy") +z1 = numpy.ones(10) +z2 = p1(z1) +print("z2:", z2, type(z2)) +assert(isinstance(z2, numpy.ndarray)) + +print("Testing Numba Numpy") +z1 = numpy.ones(10) +z2 = f1(z1) +print("z2:", z2, type(z2)) +assert(isinstance(z2, numpy.ndarray)) + +print("Testing dparray ones") +a = np.ones(10) +print("a:", a, type(a)) +assert(isinstance(a, np.ndarray)) + +print("Testing dparray multiplication") +c = a * 5 +print("c", c, type(c)) +assert(isinstance(c, np.ndarray)) + +print("Testing Python dparray") +b = p1(c) +print("b:", b, type(b)) +assert(isinstance(b, np.ndarray)) + +print("Testing Numba dparray") +b = f1(c) +print("b:", b, type(b)) +assert(isinstance(b, np.ndarray)) + +print("Testing Numba dparray 2") +d = f2(a) +print("d:", d, type(d)) +assert(isinstance(b, np.ndarray)) + +print("Testing Numba dparray constructor from numpy.ndarray") +e = f3(a, z1) +print("e:", e, type(e)) +assert(isinstance(e, np.ndarray)) + +print("Testing Numba dparray functions") +f = f4() +print("f:", f, type(f)) +assert(isinstance(f, np.ndarray)) + +print("Testing Python mixing dparray and numpy.ndarray") +h = p5(a, z1) +print("h:", h, type(h)) +assert(isinstance(h, np.ndarray)) + +print("Testing Numba mixing dparray and numpy.ndarray") +h = f5(a, z1) +print("h:", h, type(h)) +assert(isinstance(h, np.ndarray)) + +print("Testing Numba mixing dparray and constant") +g = f6(a) +print("g:", g, type(g)) +assert(isinstance(g, np.ndarray)) From a2d6403042db71502151743fc81be4921a905563 Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Tue, 29 Sep 2020 16:59:19 -0500 Subject: [PATCH 11/23] Remove debug prints. --- numba/core/base.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/numba/core/base.py b/numba/core/base.py index ad27fd62a42..9514d55e10c 100644 --- a/numba/core/base.py +++ b/numba/core/base.py @@ -435,9 +435,7 @@ def get_external_function_type(self, fndesc): return fnty def declare_function(self, module, fndesc): - #print("base.py: declare_function", module, "\n\targs:", fndesc.args, "\n\trestype:", fndesc.restype, "\n\targtypes:", fndesc.argtypes, fndesc.mangled_name, fndesc.noalias) fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes) - #print("fnty:", fnty) fn = module.get_or_insert_function(fnty, name=fndesc.mangled_name) self.call_conv.decorate_function(fn, fndesc.args, fndesc.argtypes, noalias=fndesc.noalias) if fndesc.inline: @@ -553,21 +551,14 @@ def get_function(self, fn, sig, _firstcall=True): The return value is a callable with the signature (builder, args). """ assert sig is not None -# print("get_function", fn, sig, type(fn), type(sig)) sig = sig.as_function() -# print("get_function", sig, type(sig)) if isinstance(fn, (types.Function, types.BoundFunction, types.Dispatcher)): key = fn.get_impl_key(sig) overloads = self._defns[key] -# print("function or boundfunction or dispatcher") else: key = fn overloads = self._defns[key] -# print("other") -# print("overloads", overloads) -# print_overloads(overloads.versions) -# print_defns(self._defns) try: return _wrap_impl(overloads.find(sig.args), self, sig) From 6f2bcf555d00f6f1a8855efd0af26f113f218e75 Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Tue, 29 Sep 2020 17:00:12 -0500 Subject: [PATCH 12/23] Move np.ones_like to decorator like empty_like and zeros_like. --- numba/core/typing/npydecl.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/numba/core/typing/npydecl.py b/numba/core/typing/npydecl.py index 9ce15d1a5ff..da7527bbb09 100644 --- a/numba/core/typing/npydecl.py +++ b/numba/core/typing/npydecl.py @@ -553,6 +553,7 @@ def typer(shape, dtype=None): @infer_global(np.empty_like) @infer_global(np.zeros_like) +@infer_global(np.ones_like) class NdConstructorLike(CallableTemplate): """ Typing template for np.empty_like(), .zeros_like(), .ones_like(). @@ -580,9 +581,6 @@ def typer(arg, dtype=None): return typer -infer_global(np.ones_like)(NdConstructorLike) - - @infer_global(np.full) class NdFull(CallableTemplate): @@ -599,6 +597,7 @@ def typer(shape, fill_value, dtype=None): return typer + @infer_global(np.full_like) class NdFullLike(CallableTemplate): From ea0747fbbb5ebda515a593ee9d9db5c3d9fad89c Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Tue, 29 Sep 2020 17:01:35 -0500 Subject: [PATCH 13/23] Define a module for the USM-allocator for dparray. --- numba/dppl/dppl_rt.c | 72 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 numba/dppl/dppl_rt.c diff --git a/numba/dppl/dppl_rt.c b/numba/dppl/dppl_rt.c new file mode 100644 index 00000000000..5d3305dc1ec --- /dev/null +++ b/numba/dppl/dppl_rt.c @@ -0,0 +1,72 @@ +#include "../_pymodule.h" +#include "../core/runtime/nrt_external.h" +#include "assert.h" +#include +#include + +NRT_ExternalAllocator dparray_allocator; + +void dparray_memsys_init() { + void *(*get_queue)(); + void *sycldl = dlopen("libDPPLSyclInterface.so", RTLD_NOW); + assert(sycldl != NULL); + dparray_allocator.malloc = (NRT_external_malloc_func)dlsym(sycldl, "DPPLmalloc_shared"); + dparray_allocator.realloc = NULL; + dparray_allocator.free = (NRT_external_free_func)dlsym(sycldl, "DPPLfree"); + get_queue = (void *(*))dlsym(sycldl, "DPPLGetCurrentQueue"); + dparray_allocator.opaque_data = get_queue(); +// printf("dparray_memsys_init: %p %p %p\n", dparray_allocator.malloc, dparray_allocator.free, dparray_allocator.opaque_data); +} + +void * dparray_get_ext_allocator() { + printf("dparray_get_ext_allocator %p\n", &dparray_allocator); + return (void*)&dparray_allocator; +} + +static PyObject * +get_external_allocator(PyObject *self, PyObject *args) { + return PyLong_FromVoidPtr(dparray_get_ext_allocator()); +} + +static PyMethodDef ext_methods[] = { +#define declmethod_noargs(func) { #func , ( PyCFunction )func , METH_NOARGS, NULL } + declmethod_noargs(get_external_allocator), + {NULL}, +#undef declmethod_noargs +}; + +static PyObject * +build_c_helpers_dict(void) +{ + PyObject *dct = PyDict_New(); + if (dct == NULL) + goto error; + +#define _declpointer(name, value) do { \ + PyObject *o = PyLong_FromVoidPtr(value); \ + if (o == NULL) goto error; \ + if (PyDict_SetItemString(dct, name, o)) { \ + Py_DECREF(o); \ + goto error; \ + } \ + Py_DECREF(o); \ +} while (0) + + _declpointer("dparray_get_ext_allocator", &dparray_get_ext_allocator); + +#undef _declpointer + return dct; +error: + Py_XDECREF(dct); + return NULL; +} + +MOD_INIT(_dppl_rt) { + PyObject *m; + MOD_DEF(m, "numba.dppl._dppl_rt", "No docs", ext_methods) + if (m == NULL) + return MOD_ERROR_VAL; + dparray_memsys_init(); + PyModule_AddObject(m, "c_helpers", build_c_helpers_dict()); + return MOD_SUCCESS_VAL(m); +} From 547ae8d399699be41f76a21ceb4223728e58bfbf Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Tue, 29 Sep 2020 17:03:40 -0500 Subject: [PATCH 14/23] Add an external_allocator options to the Numba runtime. --- numba/core/runtime/_nrt_python.c | 12 ++++++++++++ numba/core/runtime/_nrt_pythonmod.c | 1 + numba/core/runtime/nrt.c | 26 ++++++++++++++++++++------ numba/core/runtime/nrt.h | 11 +++++++++-- 4 files changed, 42 insertions(+), 8 deletions(-) diff --git a/numba/core/runtime/_nrt_python.c b/numba/core/runtime/_nrt_python.c index 8a891518571..e4e1bdaa0fd 100644 --- a/numba/core/runtime/_nrt_python.c +++ b/numba/core/runtime/_nrt_python.c @@ -108,6 +108,14 @@ MemInfo_get_refcount(MemInfoObject *self, void *closure) { return PyLong_FromSize_t(refct); } +static +PyObject* +MemInfo_get_external_allocator(MemInfoObject *self, void *closure) { + void *p = NRT_MemInfo_external_allocator(self->meminfo); + printf("MemInfo_get_external_allocator %p\n", p); + return PyLong_FromVoidPtr(p); +} + static void MemInfo_dealloc(MemInfoObject *self) { @@ -135,6 +143,10 @@ static PyGetSetDef MemInfo_getsets[] = { (getter)MemInfo_get_refcount, NULL, "Get the refcount", NULL}, + {"external_allocator", + (getter)MemInfo_get_external_allocator, NULL, + "Get the external allocator", + NULL}, {NULL} /* Sentinel */ }; diff --git a/numba/core/runtime/_nrt_pythonmod.c b/numba/core/runtime/_nrt_pythonmod.c index 31e1155fd9f..d1300ee8e9a 100644 --- a/numba/core/runtime/_nrt_pythonmod.c +++ b/numba/core/runtime/_nrt_pythonmod.c @@ -163,6 +163,7 @@ declmethod(MemInfo_alloc); declmethod(MemInfo_alloc_safe); declmethod(MemInfo_alloc_aligned); declmethod(MemInfo_alloc_safe_aligned); +declmethod(MemInfo_alloc_safe_aligned_external); declmethod(MemInfo_alloc_dtor_safe); declmethod(MemInfo_call_dtor); declmethod(MemInfo_new_varsize); diff --git a/numba/core/runtime/nrt.c b/numba/core/runtime/nrt.c index 9e2dc02dba4..33388c53106 100644 --- a/numba/core/runtime/nrt.c +++ b/numba/core/runtime/nrt.c @@ -180,6 +180,7 @@ void NRT_MemInfo_init(NRT_MemInfo *mi,void *data, size_t size, mi->data = data; mi->size = size; mi->external_allocator = external_allocator; + NRT_Debug(nrt_debug_print("NRT_MemInfo_init %p\n", external_allocator)); /* Update stats */ TheMSys.atomic_inc(&TheMSys.stats_mi_alloc); } @@ -187,7 +188,7 @@ void NRT_MemInfo_init(NRT_MemInfo *mi,void *data, size_t size, NRT_MemInfo *NRT_MemInfo_new(void *data, size_t size, NRT_dtor_function dtor, void *dtor_info) { - NRT_MemInfo *mi = NRT_Allocate(sizeof(NRT_MemInfo), NULL); + NRT_MemInfo *mi = NRT_Allocate(sizeof(NRT_MemInfo)); NRT_MemInfo_init(mi, data, size, dtor, dtor_info, NULL); return mi; } @@ -211,7 +212,8 @@ void nrt_internal_dtor_safe(void *ptr, size_t size, void *info) { static void *nrt_allocate_meminfo_and_data(size_t size, NRT_MemInfo **mi_out, NRT_ExternalAllocator *allocator) { NRT_MemInfo *mi; - char *base = NRT_Allocate(sizeof(NRT_MemInfo) + size, allocator); + NRT_Debug(nrt_debug_print("nrt_allocate_meminfo_and_data %p\n", allocator)); + char *base = NRT_Allocate_External(sizeof(NRT_MemInfo) + size, allocator); mi = (NRT_MemInfo *) base; *mi_out = mi; return base + sizeof(NRT_MemInfo); @@ -268,6 +270,7 @@ void *nrt_allocate_meminfo_and_data_align(size_t size, unsigned align, NRT_MemInfo **mi, NRT_ExternalAllocator *allocator) { size_t offset, intptr, remainder; + NRT_Debug(nrt_debug_print("nrt_allocate_meminfo_and_data_align %p\n", allocator)); char *base = nrt_allocate_meminfo_and_data(size + 2 * align, mi, allocator); intptr = (size_t) base; /* See if we are aligned */ @@ -302,7 +305,8 @@ NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned(size_t size, unsigned align) { NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned_external(size_t size, unsigned align, NRT_ExternalAllocator *allocator) { NRT_MemInfo *mi; - void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi, NULL); + NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_safe_aligned_external %p\n", allocator)); + void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi, allocator); /* Only fill up a couple cachelines with debug markers, to minimize overhead. */ memset(data, 0xCB, MIN(size, 256)); @@ -313,6 +317,7 @@ NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned_external(size_t size, unsigned align } void NRT_dealloc(NRT_MemInfo *mi) { + NRT_Debug(nrt_debug_print("NRT_dealloc meminfo: %p external_allocator: %p\n", mi, mi->external_allocator)); if (mi->external_allocator) { mi->external_allocator->free(mi, mi->external_allocator->opaque_data); } else { @@ -359,6 +364,10 @@ size_t NRT_MemInfo_size(NRT_MemInfo* mi) { return mi->size; } +void * NRT_MemInfo_external_allocator(NRT_MemInfo *mi) { + NRT_Debug(nrt_debug_print("NRT_MemInfo_external_allocator meminfo: %p external_allocator: %p\n", mi, mi->external_allocator)); + return mi->external_allocator; +} void NRT_MemInfo_dump(NRT_MemInfo *mi, FILE *out) { fprintf(out, "MemInfo %p refcount %zu\n", mi, mi->refct); @@ -383,7 +392,7 @@ nrt_varsize_dtor(void *ptr, size_t size, void *info) { NRT_MemInfo *NRT_MemInfo_new_varsize(size_t size) { NRT_MemInfo *mi; - void *data = NRT_Allocate(size, NULL); + void *data = NRT_Allocate(size); if (data == NULL) return NULL; @@ -408,7 +417,7 @@ void *NRT_MemInfo_varsize_alloc(NRT_MemInfo *mi, size_t size) "with a non varsize-allocated meminfo"); return NULL; /* unreachable */ } - mi->data = NRT_Allocate(size, NULL); + mi->data = NRT_Allocate(size); if (mi->data == NULL) return NULL; mi->size = size; @@ -444,9 +453,14 @@ void NRT_MemInfo_varsize_free(NRT_MemInfo *mi, void *ptr) * Low-level allocation wrappers. */ -void* NRT_Allocate(size_t size, NRT_ExternalAllocator *allocator) { +void* NRT_Allocate(size_t size) { + return NRT_Allocate_External(size, NULL); +} + +void* NRT_Allocate_External(size_t size, NRT_ExternalAllocator *allocator) { void *ptr; if (allocator) { + NRT_Debug(nrt_debug_print("NRT_Allocate custom allocator %zu\n", size)); ptr = allocator->malloc(size, allocator->opaque_data); } else { ptr = TheMSys.allocator.malloc(size); diff --git a/numba/core/runtime/nrt.h b/numba/core/runtime/nrt.h index 77d5d908b6f..530c6786731 100644 --- a/numba/core/runtime/nrt.h +++ b/numba/core/runtime/nrt.h @@ -15,7 +15,7 @@ All functions described here are threadsafe. /* Debugging facilities - enabled at compile-time */ /* #undef NDEBUG */ #if 0 -# define NRT_Debug(X) X +# define NRT_Debug(X) {X; fflush(stdout); } #else # define NRT_Debug(X) if (0) { X; } #endif @@ -184,6 +184,12 @@ void* NRT_MemInfo_data(NRT_MemInfo* mi); VISIBILITY_HIDDEN size_t NRT_MemInfo_size(NRT_MemInfo* mi); +/* + * Returns the external allocator + */ +VISIBILITY_HIDDEN +void* NRT_MemInfo_external_allocator(NRT_MemInfo* mi); + /* * NRT API for resizable buffers. @@ -211,7 +217,8 @@ void NRT_MemInfo_dump(NRT_MemInfo *mi, FILE *out); /* * Allocate memory of `size` bytes. */ -VISIBILITY_HIDDEN void* NRT_Allocate(size_t size, NRT_ExternalAllocator *allocator); +VISIBILITY_HIDDEN void* NRT_Allocate(size_t size); +VISIBILITY_HIDDEN void* NRT_Allocate_External(size_t size, NRT_ExternalAllocator *allocator); /* * Deallocate memory pointed by `ptr`. From d8fc51a0df088efdb0cb2863459f91452e9155dc Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Tue, 29 Sep 2020 17:05:17 -0500 Subject: [PATCH 15/23] Compile the new dppl_rt extension module. --- setup.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 46446291d7a..0b6c5b296e1 100644 --- a/setup.py +++ b/setup.py @@ -282,6 +282,12 @@ def check_file_at_path(path2file): 'numba/core/runtime/_nrt_python.c'], **np_compile_args) + ext_dppl = Extension(name='numba.dppl._dppl_rt', + sources=['numba/dppl/dppl_rt.c'], + depends=['numba/core/runtime/nrt_external.h', + 'numba/core/runtime/nrt.h'], + ) + ext_jitclass_box = Extension(name='numba.experimental.jitclass._box', sources=['numba/experimental/jitclass/_box.c'], depends=['numba/experimental/_pymodule.h'], @@ -294,7 +300,7 @@ def check_file_at_path(path2file): ext_modules = [ext_dynfunc, ext_dispatcher, ext_helperlib, ext_typeconv, ext_np_ufunc, ext_npyufunc_num_threads, ext_mviewbuf, - ext_nrt_python, ext_jitclass_box, ext_cuda_extras] + ext_nrt_python, ext_dppl, ext_jitclass_box, ext_cuda_extras] ext_modules += ext_np_ufunc_backends From d89b61337bc3436717d51cb12dc16c2f82bfd6d7 Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Tue, 29 Sep 2020 17:23:48 -0500 Subject: [PATCH 16/23] Most NumPy functions are now imported into dparray module and the typers and lowerers for those functions and types are working with Numba. from_ndarray and as_ndarray are also working. --- numba/dppl/dparray.py | 383 ++++++++++++++++++++++---- numba/dppl/tests/dppl/test_dparray.py | 151 +++++++--- 2 files changed, 440 insertions(+), 94 deletions(-) diff --git a/numba/dppl/dparray.py b/numba/dppl/dparray.py index cd6652c2fc4..ca5a16caf84 100644 --- a/numba/dppl/dparray.py +++ b/numba/dppl/dparray.py @@ -1,27 +1,52 @@ #from ._ndarray_utils import _transmogrify import numpy as np -from inspect import getmembers, isfunction, isclass +from inspect import getmembers, isfunction, isclass, isbuiltin from numbers import Number import numba +from types import FunctionType as ftype, BuiltinFunctionType as bftype from numba import types from numba.extending import typeof_impl, register_model, type_callable, lower_builtin from numba.np import numpy_support from numba.core.pythonapi import box, allocator from llvmlite import ir +import llvmlite.llvmpy.core as lc import llvmlite.binding as llb from numba.core import types, cgutils import builtins import sys from ctypes.util import find_library -import dppl -from dppl._memory import MemoryUSMShared +from numba.core.typing.npydecl import registry as typing_registry +from numba.core.imputils import builtin_registry as lower_registry +import importlib +import functools +import inspect +from numba.core.typing.templates import CallableTemplate +from numba.np.arrayobj import _array_copy + +debug = False + +def dprint(*args): + if debug: + print(*args) + sys.stdout.flush() flib = find_library('mkl_intel_ilp64') -print("flib:", flib) +dprint("flib:", flib) llb.load_library_permanently(flib) -functions_list = [o for o in getmembers(np) if isfunction(o[1])] +sycl_mem_lib = find_library('DPPLSyclInterface') +dprint("sycl_mem_lib:", sycl_mem_lib) +llb.load_library_permanently(sycl_mem_lib) + +import dppl +from dppl._memory import MemoryUSMShared +import numba.dppl._dppl_rt + +functions_list = [o[0] for o in getmembers(np) if isfunction(o[1]) or isbuiltin(o[1])] class_list = [o for o in getmembers(np) if isclass(o[1])] +# Register the helper function in dppl_rt so that we can insert calls to them via llvmlite. +for py_name, c_address in numba.dppl._dppl_rt.c_helpers.items(): + llb.add_symbol(py_name, c_address) class ndarray(np.ndarray): """ @@ -33,6 +58,7 @@ def __new__(subtype, shape, strides=None, order=None): # Create a new array. if buffer is None: + dprint("dparray::ndarray __new__ buffer None") nelems = np.prod(shape) dt = np.dtype(dtype) isz = dt.itemsize @@ -43,12 +69,15 @@ def __new__(subtype, shape, strides=strides, order=order) # zero copy if buffer is a usm backed array-like thing elif hasattr(buffer, '__sycl_usm_array_interface__'): + dprint("dparray::ndarray __new__ buffer __sycl_usm_array_interface__") # also check for array interface return np.ndarray.__new__( subtype, shape, dtype=dt, buffer=buffer, offset=offset, strides=strides, order=order) else: + dprint("dparray::ndarray __new__ buffer not None and not sycl_usm") + nelems = np.prod(shape) # must copy ar = np.ndarray(shape, dtype=dtype, buffer=buffer, @@ -63,6 +92,9 @@ def __new__(subtype, shape, return res def __array_finalize__(self, obj): + dprint("__array_finalize__:", obj, type(obj)) +# import pdb +# pdb.set_trace() # When called from the explicit constructor, obj is None if obj is None: return # When called in new-from-template, `obj` is another instance of our own @@ -71,6 +103,16 @@ def __array_finalize__(self, obj): # subclass of ndarray, including our own. if hasattr(obj, '__sycl_usm_array_interface__'): return + if isinstance(obj, numba.core.runtime._nrt_python._MemInfo): + dprint("array_finalize got Numba MemInfo") + ea = obj.external_allocator + d = obj.data + dprint("external_allocator:", ea, hex(ea), type(ea)) + dprint("data:", d, hex(d), type(d)) + dppl_rt_allocator = numba.dppl._dppl_rt.get_external_allocator() + dprint("dppl external_allocator:", dppl_rt_allocator, hex(dppl_rt_allocator), type(dppl_rt_allocator)) + if ea == dppl_rt_allocator: + return if isinstance(obj, np.ndarray): ob = self while isinstance(ob, np.ndarray): @@ -78,14 +120,6 @@ def __array_finalize__(self, obj): return ob = ob.base - # trace if self has underlying mkl_mem buffer -# ob = self.base - -# while isinstance(ob, ndarray): -# ob = ob.base -# if isinstance(ob, dppl.Memory): -# return - # Just raise an exception since __array_ufunc__ makes all reasonable cases not # need the code below. raise ValueError("Non-MKL allocated ndarray can not viewed as MKL-allocated one without a copy") @@ -111,13 +145,13 @@ def __array_finalize__(self, obj): _transmogrify(self, new_arr) """ + # Tell Numba to not treat this type just like a NumPy ndarray but to propagate its type. + # This way it will use the custom dparray allocator. __numba_no_subtype_ndarray__ = True - def from_ndarray(x): - return ndarray(x.shape, x.dtype, x) - + # Convert to a NumPy ndarray. def as_ndarray(self): - return np.ndarray(self.shape, self.dtype, self) + return np.copy(self) def __array__(self): return self @@ -126,14 +160,18 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if method == '__call__': N = None scalars = [] + typing = [] for inp in inputs: if isinstance(inp, Number): scalars.append(inp) + typing.append(inp) elif isinstance(inp, (self.__class__, np.ndarray)): if isinstance(inp, self.__class__): scalars.append(np.ndarray(inp.shape, inp.dtype, inp)) + typing.append(np.ndarray(inp.shape, inp.dtype)) else: scalars.append(inp) + typing.append(inp) if N is not None: if N != inp.shape: raise TypeError("inconsistent sizes") @@ -141,42 +179,61 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): N = inp.shape else: return NotImplemented + # Have to avoid recursive calls to array_ufunc here. + # If no out kwarg then we create a dparray out so that we get + # USM memory. However, if kwarg has dparray-typed out then + # array_ufunc is called recursively so we cast out as regular + # NumPy ndarray (having a USM data pointer). if kwargs.get('out', None) is None: # maybe copy? # deal with multiple returned arrays, so kwargs['out'] can be tuple - kwargs['out'] = empty(inputs[0].shape, dtype=get_ret_type_from_ufunc(ufunc)) + res_type = np.result_type(*typing) + out = empty(inputs[0].shape, dtype=res_type) + out_as_np = np.ndarray(out.shape, out.dtype, out) + kwargs['out'] = out_as_np + else: + # If they manually gave dparray as out kwarg then we have to also + # cast as regular NumPy ndarray to avoid recursion. + if isinstance(kwargs['out'], ndarray): + out = kwargs['out'] + kwargs['out'] = np.ndarray(out.shape, out.dtype, out) + else: + out = kwargs['out'] ret = ufunc(*scalars, **kwargs) - return ret -# return self.__class__(ret.shape, ret.dtype, ret) + return out else: return NotImplemented for c in class_list: cname = c[0] - new_func = "class %s(np.%s):\n" % (cname, cname) + # For now we do the simple thing and copy the types from NumPy module into dparray module. + new_func = "%s = np.%s" % (cname, cname) +# new_func = "class %s(np.%s):\n" % (cname, cname) if cname == "ndarray": # Implemented explicitly above. continue else: # This is temporary. - new_func += " pass\n" +# new_func += " pass\n" # The code below should eventually be made to work and used. # new_func += " @classmethod\n" # new_func += " def cast(cls, some_np_obj):\n" # new_func += " some_np_obj.__class__ = cls\n" # new_func += " return some_np_obj\n" + pass try: the_code = compile(new_func, '__init__', 'exec') exec(the_code) except: + print("Failed to exec class", cname) pass # Redefine all Numpy functions in this module and if they # return a Numpy array, transform that to a USM-backed array # instead. This is a stop-gap. We should eventually find a # way to do the allocation correct to start with. -for f in functions_list: - fname = f[0] +for fname in functions_list: +# print("Adding function", fname) new_func = "def %s(*args, **kwargs):\n" % fname new_func += " ret = np.%s(*args, **kwargs)\n" % fname new_func += " if type(ret) == np.ndarray:\n" @@ -224,6 +281,9 @@ def typeof_ta_ndarray(val, c): # object of type DPArray. register_model(DPArrayType)(numba.core.datamodel.models.ArrayModel) +""" +# This code should not work because you can't pass arbitrary buffer to dparray constructor. + # This tells Numba how to type calls to a DPArray constructor. @type_callable(ndarray) def type_ndarray(context): @@ -231,6 +291,15 @@ def typer(shape, ndim, buf): return DPArrayType(buf.dtype, buf.ndim, buf.layout) return typer +@overload(ndarray) +def overload_ndarray_constructor(shape, dtype, buf): + print("overload_ndarray_constructor:", shape, dtype, buf) + + def ndarray_impl(shape, dtype, buf): + pass + + return ndarray_impl + # This tells Numba how to implement calls to a DPArray constructor. @lower_builtin(ndarray, types.UniTuple, types.DType, types.Array) def impl_ndarray(context, builder, sig, args): @@ -238,6 +307,30 @@ def impl_ndarray(context, builder, sig, args): shape, ndim, buf = args return buf + context.nrt._require_nrt() + + mod = builder.module + u32 = ir.IntType(32) + + # Get the Numba external allocator for USM memory. + ext_allocator_fnty = ir.FunctionType(cgutils.voidptr_t, []) + ext_allocator_fn = mod.get_or_insert_function(ext_allocator_fnty, + name="dparray_get_ext_allocator") + ext_allocator = builder.call(ext_allocator_fn, []) + # Get the Numba function to allocate an aligned array with an external allocator. + fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t, u32, cgutils.voidptr_t]) + fn = mod.get_or_insert_function(fnty, + name="NRT_MemInfo_alloc_safe_aligned_external") + fn.return_value.add_attribute("noalias") + if isinstance(align, builtins.int): + align = context.get_constant(types.uint32, align) + else: + assert align.type == u32, "align must be a uint32" + newary = builder.call(fn, [size, align, ext_allocator]) + + return buf +""" + # This tells Numba how to convert from its native representation # of a DPArray in a njit function back to a Python DPArray. @box(DPArrayType) @@ -259,35 +352,215 @@ def box_array(typ, val, c): # DPArray in a njit function. @allocator(DPArrayType) def allocator_DPArray(context, builder, size, align): - print("allocator_DPArray") - sys.stdout.flush() - use_Numba_allocator = True - if use_Numba_allocator: - print("Using Numba allocator") - context.nrt._require_nrt() - - mod = builder.module - u32 = ir.IntType(32) - fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t, u32]) - fn = mod.get_or_insert_function(fnty, - name="NRT_MemInfo_alloc_safe_aligned") - fn.return_value.add_attribute("noalias") - if isinstance(align, builtins.int): - align = context.get_constant(types.uint32, align) - else: - assert align.type == u32, "align must be a uint32" - return builder.call(fn, [size, align]) + context.nrt._require_nrt() + + mod = builder.module + u32 = ir.IntType(32) + + # Get the Numba external allocator for USM memory. + ext_allocator_fnty = ir.FunctionType(cgutils.voidptr_t, []) + ext_allocator_fn = mod.get_or_insert_function(ext_allocator_fnty, + name="dparray_get_ext_allocator") + ext_allocator = builder.call(ext_allocator_fn, []) + # Get the Numba function to allocate an aligned array with an external allocator. + fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t, u32, cgutils.voidptr_t]) + fn = mod.get_or_insert_function(fnty, + name="NRT_MemInfo_alloc_safe_aligned_external") + fn.return_value.add_attribute("noalias") + if isinstance(align, builtins.int): + align = context.get_constant(types.uint32, align) else: - print("Using mkl_malloc") - context.nrt._require_nrt() - - mod = builder.module - u32 = ir.IntType(32) - fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t, u32]) - fn = mod.get_or_insert_function(fnty, name="mkl_malloc") - fn.return_value.add_attribute("noalias") - if isinstance(align, builtins.int): - align = context.get_constant(types.uint32, align) - else: - assert align.type == u32, "align must be a uint32" - return builder.call(fn, [size, align]) + assert align.type == u32, "align must be a uint32" + return builder.call(fn, [size, align, ext_allocator]) + + +def numba_register(): + numba_register_typing() + numba_register_lower_builtin() + +# Copy a function registered as a lowerer in Numba but change the +# "np" import in Numba to point to dparray instead of NumPy. +def copy_func_for_dparray(f, dparray_mod): + import copy as cc + # Make a copy so our change below doesn't affect anything else. + gglobals = cc.copy(f.__globals__) + # Make the "np"'s in the code use dparray instead of Numba's default NumPy. + gglobals['np'] = dparray_mod + # Create a new function using the original code but the new globals. + g = ftype(f.__code__, gglobals, None, f.__defaults__, f.__closure__) + # Some other tricks to make sure the function copy works. + g = functools.update_wrapper(g, f) + g.__kwdefaults__ = f.__kwdefaults__ + return g + +def numba_register_lower_builtin(): + todo = [] + todo_builtin = [] + + # For all Numpy identifiers that have been registered for typing in Numba... + # this registry contains functions, getattrs, setattrs, casts and constants...need to do them all? FIX FIX FIX + for ig in lower_registry.functions: +# print("ig:", ig, type(ig), len(ig)) + impl, func, types = ig +# print("register lower_builtin:", impl, type(impl), func, type(func), types, type(types)) + # If it is a Numpy function... + if isinstance(func, ftype): +# print("isfunction:", func.__module__, type(func.__module__)) + if func.__module__ == np.__name__: +# print("name:", func.__name__) + # If we have overloaded that function in the dparray module (always True right now)... + if func.__name__ in functions_list: + todo.append(ig) + if isinstance(func, bftype): +# print("isbuiltinfunction:", func.__module__, type(func.__module__)) + if func.__module__ == np.__name__: +# print("name:", func.__name__) + # If we have overloaded that function in the dparray module (always True right now)... + if func.__name__ in functions_list: + todo.append(ig) +# print("todo_builtin added:", func.__name__) + + cur_mod = importlib.import_module(__name__) + for impl, func, types in (todo+todo_builtin): + dparray_func = eval(func.__name__) +# print("need to re-register lowerer for dparray", impl, func, types, dparray_func) + new_impl = copy_func_for_dparray(impl, cur_mod) +# lower_registry.functions.append((impl, dparray_func, types)) + lower_registry.functions.append((new_impl, dparray_func, types)) + +def argspec_to_string(argspec): + first_default_arg = len(argspec.args)-len(argspec.defaults) + non_def = argspec.args[:first_default_arg] + arg_zip = list(zip(argspec.args[first_default_arg:], argspec.defaults)) + combined = [a+"="+str(b) for a,b in arg_zip] + return ",".join(non_def + combined) + +def numba_register_typing(): + todo = [] + todo_classes = [] + + # For all Numpy identifiers that have been registered for typing in Numba... + for ig in typing_registry.globals: + val, typ = ig +# print("global typing:", val, type(val), typ, type(typ)) + # If it is a Numpy function... + if isinstance(val, (ftype, bftype)): +# print("name:", val.__name__, val.__name__ in functions_list) + # If we have overloaded that function in the dparray module (always True right now)... + if val.__name__ in functions_list: + todo.append(ig) + if isinstance(val, type): + todo_classes.append(ig) + + for val, typ in todo_classes: +# print("todo_classes:", val, type(val), typ, type(typ)) +# assert len(typ.templates) == 1 + dpval = eval(val.__name__) + + for val, typ in todo: + assert len(typ.templates) == 1 + # template is the typing class to invoke generic() upon. + template = typ.templates[0] + dpval = eval(val.__name__) + if debug: + print("--------------------------------------------------------------") + print("need to re-register for dparray", val, typ, typ.typing_key) + print("val:", val, type(val), "dir val", dir(val)) + print("typ:", typ, type(typ), "dir typ", dir(typ)) + print("typing key:", typ.typing_key) + print("name:", typ.name) + print("key:", typ.key) + print("templates:", typ.templates) + print("template:", template, type(template)) + print("dpval:", dpval, type(dpval)) + print("--------------------------------------------------------------") + + class_name = "DparrayTemplate_" + val.__name__ + + @classmethod + def set_key_original(cls, key, original): + cls.key = key + cls.original = original + + def generic_impl(self): +# print("generic_impl", self.__class__.key, self.__class__.original) + original_typer = self.__class__.original.generic(self.__class__.original) + #print("original_typer:", original_typer, type(original_typer), self.__class__) + ot_argspec = inspect.getfullargspec(original_typer) + #print("ot_argspec:", ot_argspec) + astr = argspec_to_string(ot_argspec) + #print("astr:", astr) + + typer_func = """def typer({}): + original_res = original_typer({}) + #print("original_res:", original_res) + if isinstance(original_res, types.Array): + return DPArrayType(dtype=original_res.dtype, ndim=original_res.ndim, layout=original_res.layout) + + return original_res""".format(astr, ",".join(ot_argspec.args)) + + #print("typer_func:", typer_func) + + try: + gs = globals() + ls = locals() + gs["original_typer"] = ls["original_typer"] + exec(typer_func, globals(), locals()) + except NameError as ne: + print("NameError in exec:", ne) + sys.exit(0) + except: + print("exec failed!", sys.exc_info()[0]) + sys.exit(0) + + try: + exec_res = eval("typer") + except NameError as ne: + print("NameError in eval:", ne) + sys.exit(0) + except: + print("eval failed!", sys.exc_info()[0]) + sys.exit(0) + + #print("exec_res:", exec_res) + return exec_res + + new_dparray_template = type(class_name, (template,), { + "set_class_vars":set_key_original, + "generic":generic_impl}) + + new_dparray_template.set_class_vars(dpval, template) + + assert(callable(dpval)) + type_handler = types.Function(new_dparray_template) + typing_registry.register_global(dpval, type_handler) + +def from_ndarray(x): + return copy(x) + +def as_ndarray(x): + return np.copy(x) + +@typing_registry.register_global(as_ndarray) +class DparrayAsNdarray(CallableTemplate): + def generic(self): + def typer(arg): + return types.Array(dtype=arg.dtype, ndim=arg.ndim, layout=arg.layout) + + return typer + +@typing_registry.register_global(from_ndarray) +class DparrayFromNdarray(CallableTemplate): + def generic(self): + def typer(arg): + return DPArrayType(dtype=arg.dtype, ndim=arg.ndim, layout=arg.layout) + + return typer + +@lower_registry.lower(as_ndarray, DPArrayType) +def dparray_conversion_as(context, builder, sig, args): + return _array_copy(context, builder, sig, args) + +@lower_registry.lower(from_ndarray, types.Array) +def dparray_conversion_from(context, builder, sig, args): + return _array_copy(context, builder, sig, args) diff --git a/numba/dppl/tests/dppl/test_dparray.py b/numba/dppl/tests/dppl/test_dparray.py index 1b49d10e019..9407eef6f66 100644 --- a/numba/dppl/tests/dppl/test_dparray.py +++ b/numba/dppl/tests/dppl/test_dparray.py @@ -1,8 +1,11 @@ from __future__ import print_function, division, absolute_import import numba -import numba.dppl.dparray as np +import numba.dppl.dparray as dparray import numpy +import sys + +dparray.numba_register() # HACK to get the timing right for now def p1(a): return a * 2.0 + 13 @@ -14,14 +17,14 @@ def f2(a): return a @numba.njit() -def f3(a, b): - return a * np.ndarray(b.shape, b.dtype, b) +def f3(a, b): # a is dparray, b is numpy + return a * dparray.asarray(b) @numba.njit() def f4(): - return np.ones(10) + return dparray.ones(10) -def p5(a, b): +def p5(a, b): # a is dparray, b is numpy return a * b f5 = numba.njit(p5) @@ -30,64 +33,134 @@ def p5(a, b): def f6(a): return a + 13 -print("Testing Python Numpy") +@numba.njit() +def f7(a): # a is dparray + # implicit conversion of a to numpy.ndarray + b = numpy.ones(10) + c = a * b + d = a.argsort() # with no implicit conversion this fails + +@numba.njit +def f8(a): + return dparray.as_ndarray(a) + +@numba.njit +def f9(a): + return dparray.from_ndarray(a) + +print("------------------- Testing Python Numpy") +sys.stdout.flush() z1 = numpy.ones(10) z2 = p1(z1) print("z2:", z2, type(z2)) -assert(isinstance(z2, numpy.ndarray)) +assert(type(z2) == numpy.ndarray) -print("Testing Numba Numpy") +print("------------------- Testing Numba Numpy") +sys.stdout.flush() z1 = numpy.ones(10) z2 = f1(z1) print("z2:", z2, type(z2)) -assert(isinstance(z2, numpy.ndarray)) +assert(type(z2) == numpy.ndarray) -print("Testing dparray ones") -a = np.ones(10) +print("------------------- Testing dparray ones") +sys.stdout.flush() +a = dparray.ones(10) print("a:", a, type(a)) -assert(isinstance(a, np.ndarray)) - -print("Testing dparray multiplication") +assert(isinstance(a, dparray.ndarray)) + +print("------------------- Testing dparray.dparray.as_ndarray") +sys.stdout.flush() +nd1 = a.as_ndarray() +print("nd1:", nd1, type(nd1)) +assert(type(nd1) == numpy.ndarray) + +print("------------------- Testing dparray.as_ndarray") +sys.stdout.flush() +nd2 = dparray.as_ndarray(a) +print("nd2:", nd2, type(nd2)) +assert(type(nd2) == numpy.ndarray) + +print("------------------- Testing dparray.from_ndarray") +sys.stdout.flush() +dp1 = dparray.from_ndarray(nd2) +print("dp1:", dp1, type(dp1)) +assert(isinstance(dp1, dparray.ndarray)) + +print("------------------- Testing dparray multiplication") +sys.stdout.flush() c = a * 5 print("c", c, type(c)) -assert(isinstance(c, np.ndarray)) +assert(isinstance(c, dparray.ndarray)) -print("Testing Python dparray") +print("------------------- Testing Python dparray") +sys.stdout.flush() b = p1(c) print("b:", b, type(b)) -assert(isinstance(b, np.ndarray)) +assert(isinstance(b, dparray.ndarray)) +del b -print("Testing Numba dparray") -b = f1(c) -print("b:", b, type(b)) -assert(isinstance(b, np.ndarray)) +print("------------------- Testing Python mixing dparray and numpy.ndarray") +sys.stdout.flush() +h = p5(a, z1) +print("h:", h, type(h)) +assert(isinstance(h, dparray.ndarray)) +del h -print("Testing Numba dparray 2") +print("------------------- Testing Numba dparray 2") +sys.stdout.flush() d = f2(a) print("d:", d, type(d)) -assert(isinstance(b, np.ndarray)) +assert(isinstance(d, dparray.ndarray)) +del d + +print("------------------- Testing Numba dparray") +sys.stdout.flush() +b = f1(c) +print("b:", b, type(b)) +assert(isinstance(b, dparray.ndarray)) +del b -print("Testing Numba dparray constructor from numpy.ndarray") +""" +print("------------------- Testing Numba dparray constructor from numpy.ndarray") +sys.stdout.flush() e = f3(a, z1) print("e:", e, type(e)) -assert(isinstance(e, np.ndarray)) +assert(isinstance(e, dparray.ndarray)) +""" -print("Testing Numba dparray functions") +print("------------------- Testing Numba mixing dparray and constant") +sys.stdout.flush() +g = f6(a) +print("g:", g, type(g)) +assert(isinstance(g, dparray.ndarray)) +del g + +print("------------------- Testing Numba mixing dparray and numpy.ndarray") +sys.stdout.flush() +h = f5(a, z1) +print("h:", h, type(h)) +assert(isinstance(h, dparray.ndarray)) +del h + +print("------------------- Testing Numba dparray functions") +sys.stdout.flush() f = f4() print("f:", f, type(f)) -assert(isinstance(f, np.ndarray)) +assert(isinstance(f, dparray.ndarray)) -print("Testing Python mixing dparray and numpy.ndarray") -h = p5(a, z1) -print("h:", h, type(h)) -assert(isinstance(h, np.ndarray)) +print("------------------- Testing Numba dparray.as_ndarray") +sys.stdout.flush() +nd3 = f8(a) +print("nd3:", nd3, type(nd3)) +assert(type(nd3) == numpy.ndarray) -print("Testing Numba mixing dparray and numpy.ndarray") -h = f5(a, z1) -print("h:", h, type(h)) -assert(isinstance(h, np.ndarray)) +print("------------------- Testing Numba dparray.from_ndarray") +sys.stdout.flush() +dp2 = f9(nd3) +print("dp2:", dp2, type(dp2)) +assert(isinstance(dp2, dparray.ndarray)) -print("Testing Numba mixing dparray and constant") -g = f6(a) -print("g:", g, type(g)) -assert(isinstance(g, np.ndarray)) +#------------------------------- +del a + +print("SUCCESS") From 55e61c3abcffa62496851c63fdf6e1a97dad739e Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Wed, 30 Sep 2020 15:43:46 -0500 Subject: [PATCH 17/23] Fix NRT_Allocate signature issue. --- numba/core/runtime/nrt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numba/core/runtime/nrt.c b/numba/core/runtime/nrt.c index 7c5e6d8c0cd..33388c53106 100644 --- a/numba/core/runtime/nrt.c +++ b/numba/core/runtime/nrt.c @@ -392,7 +392,7 @@ nrt_varsize_dtor(void *ptr, size_t size, void *info) { NRT_MemInfo *NRT_MemInfo_new_varsize(size_t size) { NRT_MemInfo *mi; - void *data = NRT_Allocate(size, NULL); + void *data = NRT_Allocate(size); if (data == NULL) return NULL; @@ -417,7 +417,7 @@ void *NRT_MemInfo_varsize_alloc(NRT_MemInfo *mi, size_t size) "with a non varsize-allocated meminfo"); return NULL; /* unreachable */ } - mi->data = NRT_Allocate(size, NULL); + mi->data = NRT_Allocate(size); if (mi->data == NULL) return NULL; mi->size = size; From 3cbf1c68215386161fd02edf06b9dff41ddfa369 Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Wed, 30 Sep 2020 15:45:30 -0500 Subject: [PATCH 18/23] Switch to dpctl. --- numba/dppl/compiler.py | 2 +- numba/dppl/device_init.py | 2 +- numba/dppl/dppl_host_fn_call_gen.py | 2 +- numba/dppl/dppl_lowerer.py | 2 +- numba/dppl/dppl_rt.c | 25 +++++++++++++++++++++---- numba/dppl_config.py | 2 +- 6 files changed, 26 insertions(+), 9 deletions(-) diff --git a/numba/dppl/compiler.py b/numba/dppl/compiler.py index a507a1627a4..e0f83ecf1e4 100644 --- a/numba/dppl/compiler.py +++ b/numba/dppl/compiler.py @@ -10,7 +10,7 @@ from types import FunctionType from inspect import signature -import dppl.ocldrv as driver +import dpctl.ocldrv as driver from . import spirv_generator import os diff --git a/numba/dppl/device_init.py b/numba/dppl/device_init.py index 277f900776f..d5f461654c8 100644 --- a/numba/dppl/device_init.py +++ b/numba/dppl/device_init.py @@ -23,7 +23,7 @@ from . import initialize from .decorators import kernel, func, autojit -from dppl.ocldrv import runtime +from dpctl.ocldrv import runtime from . import target diff --git a/numba/dppl/dppl_host_fn_call_gen.py b/numba/dppl/dppl_host_fn_call_gen.py index f56002e0e2d..91f99f736e9 100644 --- a/numba/dppl/dppl_host_fn_call_gen.py +++ b/numba/dppl/dppl_host_fn_call_gen.py @@ -1,6 +1,6 @@ from __future__ import print_function, division, absolute_import -import dppl.ocldrv as driver +import dpctl.ocldrv as driver import llvmlite.llvmpy.core as lc import llvmlite.ir.values as liv import llvmlite.ir as lir diff --git a/numba/dppl/dppl_lowerer.py b/numba/dppl/dppl_lowerer.py index a799ed88895..2322bdffecd 100644 --- a/numba/dppl/dppl_lowerer.py +++ b/numba/dppl/dppl_lowerer.py @@ -40,7 +40,7 @@ from .target import SPIR_GENERIC_ADDRSPACE from .dufunc_inliner import dufunc_inliner from . import dppl_host_fn_call_gen as dppl_call_gen -import dppl.ocldrv as driver +import dpctl.ocldrv as driver from numba.dppl.target import DPPLTargetContext diff --git a/numba/dppl/dppl_rt.c b/numba/dppl/dppl_rt.c index 5d3305dc1ec..dde583a4c4c 100644 --- a/numba/dppl/dppl_rt.c +++ b/numba/dppl/dppl_rt.c @@ -8,12 +8,29 @@ NRT_ExternalAllocator dparray_allocator; void dparray_memsys_init() { void *(*get_queue)(); - void *sycldl = dlopen("libDPPLSyclInterface.so", RTLD_NOW); + char *lib_name = "libDPPLSyclInterface.so"; + char *malloc_name = "DPPLmalloc_shared"; + char *free_name = "DPPLfree_with_queue"; + char *get_queue_name = "DPPLQueueMgr_GetCurrentQueue"; + + void *sycldl = dlopen(lib_name, RTLD_NOW); assert(sycldl != NULL); - dparray_allocator.malloc = (NRT_external_malloc_func)dlsym(sycldl, "DPPLmalloc_shared"); + dparray_allocator.malloc = (NRT_external_malloc_func)dlsym(sycldl, malloc_name); + if (dparray_allocator.malloc == NULL) { + printf("Did not find %s in %s\n", malloc_name, lib_name); + exit(-1); + } dparray_allocator.realloc = NULL; - dparray_allocator.free = (NRT_external_free_func)dlsym(sycldl, "DPPLfree"); - get_queue = (void *(*))dlsym(sycldl, "DPPLGetCurrentQueue"); + dparray_allocator.free = (NRT_external_free_func)dlsym(sycldl, free_name); + if (dparray_allocator.free == NULL) { + printf("Did not find %s in %s\n", free_name, lib_name); + exit(-1); + } + get_queue = (void *(*))dlsym(sycldl, get_queue_name); + if (get_queue == NULL) { + printf("Did not find %s in %s\n", get_queue_name, lib_name); + exit(-1); + } dparray_allocator.opaque_data = get_queue(); // printf("dparray_memsys_init: %p %p %p\n", dparray_allocator.malloc, dparray_allocator.free, dparray_allocator.opaque_data); } diff --git a/numba/dppl_config.py b/numba/dppl_config.py index ae9bfd0e4ec..570cc7e83ca 100644 --- a/numba/dppl_config.py +++ b/numba/dppl_config.py @@ -1,7 +1,7 @@ dppl_present = False try: - from dppl.ocldrv import * + from dpctl.ocldrv import * except: pass else: From 699a8bf811268e744fd954a179ee20a100b6f256 Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Wed, 30 Sep 2020 15:47:25 -0500 Subject: [PATCH 19/23] Switch to dpctl. --- numba/dppl/dparray.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/numba/dppl/dparray.py b/numba/dppl/dparray.py index ca5a16caf84..51e36fb8f80 100644 --- a/numba/dppl/dparray.py +++ b/numba/dppl/dparray.py @@ -38,8 +38,8 @@ def dprint(*args): dprint("sycl_mem_lib:", sycl_mem_lib) llb.load_library_permanently(sycl_mem_lib) -import dppl -from dppl._memory import MemoryUSMShared +import dpctl +from dpctl._memory import MemoryUSMShared import numba.dppl._dppl_rt functions_list = [o[0] for o in getmembers(np) if isfunction(o[1]) or isbuiltin(o[1])] @@ -126,7 +126,7 @@ def __array_finalize__(self, obj): """ # since dparray must have mkl_memory underlying it, a copy must be made - newbuf = dppl.Memory(nbytes=self.data.nbytes) + newbuf = dpctl.Memory(nbytes=self.data.nbytes) new_arr = np.ndarray.__new__( type(self), self.shape, @@ -452,10 +452,11 @@ def numba_register_typing(): if isinstance(val, type): todo_classes.append(ig) - for val, typ in todo_classes: + # This is actuallya no-op now. +# for val, typ in todo_classes: # print("todo_classes:", val, type(val), typ, type(typ)) # assert len(typ.templates) == 1 - dpval = eval(val.__name__) +# dpval = eval(val.__name__) for val, typ in todo: assert len(typ.templates) == 1 From aa945b999a11a1da5c5929664d1255616da9135c Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Wed, 30 Sep 2020 16:26:29 -0500 Subject: [PATCH 20/23] Register dparray typing and lowering functions in a better spot so no need for applications to call the hacked registration function any more. --- numba/core/cpu.py | 2 ++ numba/dppl/dparray.py | 13 ++++++++++--- numba/dppl/tests/dppl/test_dparray.py | 2 -- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/numba/core/cpu.py b/numba/core/cpu.py index 150a1ebf930..0640c4b0e60 100644 --- a/numba/core/cpu.py +++ b/numba/core/cpu.py @@ -66,6 +66,7 @@ def load_additional_registries(self): from numba.np import npyimpl from numba.cpython import cmathimpl, mathimpl, printimpl, randomimpl from numba.misc import cffiimpl + from numba.dppl.dparray import numba_register as dparray_register self.install_registry(cmathimpl.registry) self.install_registry(cffiimpl.registry) self.install_registry(mathimpl.registry) @@ -75,6 +76,7 @@ def load_additional_registries(self): # load 3rd party extensions numba.core.entrypoints.init_all() + dparray_register() @property def target_data(self): diff --git a/numba/dppl/dparray.py b/numba/dppl/dparray.py index 51e36fb8f80..47a0b46b71e 100644 --- a/numba/dppl/dparray.py +++ b/numba/dppl/dparray.py @@ -373,10 +373,14 @@ def allocator_DPArray(context, builder, size, align): assert align.type == u32, "align must be a uint32" return builder.call(fn, [size, align, ext_allocator]) +registered = False def numba_register(): - numba_register_typing() - numba_register_lower_builtin() + global registered + if not registered: + registered = True + numba_register_typing() + numba_register_lower_builtin() # Copy a function registered as a lowerer in Numba but change the # "np" import in Numba to point to dparray instead of NumPy. @@ -423,7 +427,7 @@ def numba_register_lower_builtin(): cur_mod = importlib.import_module(__name__) for impl, func, types in (todo+todo_builtin): dparray_func = eval(func.__name__) -# print("need to re-register lowerer for dparray", impl, func, types, dparray_func) + dprint("need to re-register lowerer for dparray", impl, func, types, dparray_func) new_impl = copy_func_for_dparray(impl, cur_mod) # lower_registry.functions.append((impl, dparray_func, types)) lower_registry.functions.append((new_impl, dparray_func, types)) @@ -463,6 +467,8 @@ def numba_register_typing(): # template is the typing class to invoke generic() upon. template = typ.templates[0] dpval = eval(val.__name__) + dprint("need to re-register for dparray", val, typ, typ.typing_key) + """ if debug: print("--------------------------------------------------------------") print("need to re-register for dparray", val, typ, typ.typing_key) @@ -475,6 +481,7 @@ def numba_register_typing(): print("template:", template, type(template)) print("dpval:", dpval, type(dpval)) print("--------------------------------------------------------------") + """ class_name = "DparrayTemplate_" + val.__name__ diff --git a/numba/dppl/tests/dppl/test_dparray.py b/numba/dppl/tests/dppl/test_dparray.py index 9407eef6f66..af18bb3fc5a 100644 --- a/numba/dppl/tests/dppl/test_dparray.py +++ b/numba/dppl/tests/dppl/test_dparray.py @@ -5,8 +5,6 @@ import numpy import sys -dparray.numba_register() # HACK to get the timing right for now - def p1(a): return a * 2.0 + 13 From 107f9c3792c808bc181248dcfc63400b0675608b Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Wed, 30 Sep 2020 19:01:36 -0500 Subject: [PATCH 21/23] Give dparray's an empty dict for __array_interface. We can gradually fill this in with correct values. How to synch the dict between dparray and dpnp. I guess that problem goes away when they merge. Adding a test for dparray.empty() in Numba. Asserting that each dparray has an __array_interace__ attribute. --- numba/dppl/dparray.py | 42 ++++++++++++++++++++------- numba/dppl/tests/dppl/test_dparray.py | 22 ++++++++++++++ 2 files changed, 54 insertions(+), 10 deletions(-) diff --git a/numba/dppl/dparray.py b/numba/dppl/dparray.py index 47a0b46b71e..55dc2c7374b 100644 --- a/numba/dppl/dparray.py +++ b/numba/dppl/dparray.py @@ -48,6 +48,10 @@ def dprint(*args): for py_name, c_address in numba.dppl._dppl_rt.c_helpers.items(): llb.add_symbol(py_name, c_address) +array_interface_property = "__array_interface__" +def has_array_interface(x): + return hasattr(x, array_interface_property) + class ndarray(np.ndarray): """ numpy.ndarray subclass whose underlying memory buffer is allocated @@ -63,18 +67,30 @@ def __new__(subtype, shape, dt = np.dtype(dtype) isz = dt.itemsize buf = MemoryUSMShared(nbytes=isz*max(1,nelems)) - return np.ndarray.__new__( + new_obj = np.ndarray.__new__( subtype, shape, dtype=dt, buffer=buf, offset=0, strides=strides, order=order) + if hasattr(new_obj, array_interface_property): + dprint("buffer None new_obj already has sycl_usm") + else: + dprint("buffer None new_obj will add sycl_usm") + new_obj.__sycl_usm_array_interface__ = {} + return new_obj # zero copy if buffer is a usm backed array-like thing - elif hasattr(buffer, '__sycl_usm_array_interface__'): - dprint("dparray::ndarray __new__ buffer __sycl_usm_array_interface__") + elif hasattr(buffer, array_interface_property): + dprint("dparray::ndarray __new__ buffer", array_interface_property) # also check for array interface - return np.ndarray.__new__( - subtype, shape, dtype=dt, + new_obj = np.ndarray.__new__( + subtype, shape, dtype=dtype, buffer=buffer, offset=offset, strides=strides, order=order) + if hasattr(new_obj, array_interface_property): + dprint("buffer None new_obj already has sycl_usm") + else: + dprint("buffer None new_obj will add sycl_usm") + new_obj.__sycl_usm_array_interface__ = {} + return new_obj else: dprint("dparray::ndarray __new__ buffer not None and not sycl_usm") nelems = np.prod(shape) @@ -84,12 +100,17 @@ def __new__(subtype, shape, offset=offset, strides=strides, order=order) buf = MemoryUSMShared(nbytes=ar.nbytes) - res = np.ndarray.__new__( + new_obj = np.ndarray.__new__( subtype, shape, dtype=dtype, buffer=buf, offset=0, strides=strides, order=order) - np.copyto(res, ar, casting='no') - return res + np.copyto(new_obj, ar, casting='no') + if hasattr(new_obj, array_interface_property): + dprint("buffer None new_obj already has sycl_usm") + else: + dprint("buffer None new_obj will add sycl_usm") + new_obj.__sycl_usm_array_interface__ = {} + return new_obj def __array_finalize__(self, obj): dprint("__array_finalize__:", obj, type(obj)) @@ -101,7 +122,7 @@ def __array_finalize__(self, obj): # subclass, that we might use to update the new `self` instance. # However, when called from view casting, `obj` can be an instance of any # subclass of ndarray, including our own. - if hasattr(obj, '__sycl_usm_array_interface__'): + if hasattr(obj, array_interface_property): return if isinstance(obj, numba.core.runtime._nrt_python._MemInfo): dprint("array_finalize got Numba MemInfo") @@ -116,7 +137,7 @@ def __array_finalize__(self, obj): if isinstance(obj, np.ndarray): ob = self while isinstance(ob, np.ndarray): - if hasattr(obj, '__sycl_usm_array_interface__'): + if hasattr(obj, array_interface_property): return ob = ob.base @@ -342,6 +363,7 @@ def box_array(typ, val, c): dtypeptr = c.env_manager.read_const(c.env_manager.add_const(np_dtype)) # Steals NRT ref newary = c.pyapi.nrt_adapt_ndarray_to_python(typ, val, dtypeptr) + print("box_array:", c, type(c), c.pyapi, type(c.pyapi)) return newary else: parent = nativeary.parent diff --git a/numba/dppl/tests/dppl/test_dparray.py b/numba/dppl/tests/dppl/test_dparray.py index af18bb3fc5a..1578d4c8612 100644 --- a/numba/dppl/tests/dppl/test_dparray.py +++ b/numba/dppl/tests/dppl/test_dparray.py @@ -46,6 +46,10 @@ def f8(a): def f9(a): return dparray.from_ndarray(a) +@numba.njit +def f10(): + return dparray.empty((10,10)) + print("------------------- Testing Python Numpy") sys.stdout.flush() z1 = numpy.ones(10) @@ -65,6 +69,7 @@ def f9(a): a = dparray.ones(10) print("a:", a, type(a)) assert(isinstance(a, dparray.ndarray)) +assert(dparray.has_array_interface(a)) print("------------------- Testing dparray.dparray.as_ndarray") sys.stdout.flush() @@ -83,18 +88,21 @@ def f9(a): dp1 = dparray.from_ndarray(nd2) print("dp1:", dp1, type(dp1)) assert(isinstance(dp1, dparray.ndarray)) +assert(dparray.has_array_interface(dp1)) print("------------------- Testing dparray multiplication") sys.stdout.flush() c = a * 5 print("c", c, type(c)) assert(isinstance(c, dparray.ndarray)) +assert(dparray.has_array_interface(c)) print("------------------- Testing Python dparray") sys.stdout.flush() b = p1(c) print("b:", b, type(b)) assert(isinstance(b, dparray.ndarray)) +assert(dparray.has_array_interface(b)) del b print("------------------- Testing Python mixing dparray and numpy.ndarray") @@ -102,6 +110,7 @@ def f9(a): h = p5(a, z1) print("h:", h, type(h)) assert(isinstance(h, dparray.ndarray)) +assert(dparray.has_array_interface(h)) del h print("------------------- Testing Numba dparray 2") @@ -109,6 +118,7 @@ def f9(a): d = f2(a) print("d:", d, type(d)) assert(isinstance(d, dparray.ndarray)) +assert(dparray.has_array_interface(d)) del d print("------------------- Testing Numba dparray") @@ -116,6 +126,7 @@ def f9(a): b = f1(c) print("b:", b, type(b)) assert(isinstance(b, dparray.ndarray)) +assert(dparray.has_array_interface(b)) del b """ @@ -131,6 +142,7 @@ def f9(a): g = f6(a) print("g:", g, type(g)) assert(isinstance(g, dparray.ndarray)) +assert(dparray.has_array_interface(g)) del g print("------------------- Testing Numba mixing dparray and numpy.ndarray") @@ -138,6 +150,7 @@ def f9(a): h = f5(a, z1) print("h:", h, type(h)) assert(isinstance(h, dparray.ndarray)) +assert(dparray.has_array_interface(h)) del h print("------------------- Testing Numba dparray functions") @@ -145,6 +158,7 @@ def f9(a): f = f4() print("f:", f, type(f)) assert(isinstance(f, dparray.ndarray)) +assert(dparray.has_array_interface(f)) print("------------------- Testing Numba dparray.as_ndarray") sys.stdout.flush() @@ -157,6 +171,14 @@ def f9(a): dp2 = f9(nd3) print("dp2:", dp2, type(dp2)) assert(isinstance(dp2, dparray.ndarray)) +assert(dparray.has_array_interface(dp2)) + +print("------------------- Testing Numba dparray.empty") +sys.stdout.flush() +dp3 = f10() +print("dp3:", dp3, type(dp3)) +assert(isinstance(dp3, dparray.ndarray)) +assert(dparray.has_array_interface(dp3)) #------------------------------- del a From ccafc8e7aa66948636152e2b25c828d15ac20ea4 Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Fri, 2 Oct 2020 20:44:05 -0500 Subject: [PATCH 22/23] Add support for attribute typing so things like array.T (transpose) now work. Numba can layer objects in its MemInfo dtor_info field. If present, that is a Python object that is the parent of the given MemInfo. From that, you can see if that is a ndarray and if so look at its MemInfo stored in base and see if that has an external allocator and if so it is also USM. --- numba/core/runtime/_nrt_python.c | 21 +++++++ numba/core/runtime/nrt.c | 11 +++- numba/core/runtime/nrt.h | 6 ++ numba/dppl/dparray.py | 79 ++++++++++++++++++++++----- numba/dppl/dppl_rt.c | 6 +- numba/dppl/tests/dppl/test_dparray.py | 27 +++++++++ 6 files changed, 131 insertions(+), 19 deletions(-) diff --git a/numba/core/runtime/_nrt_python.c b/numba/core/runtime/_nrt_python.c index e4e1bdaa0fd..ec6587b9368 100644 --- a/numba/core/runtime/_nrt_python.c +++ b/numba/core/runtime/_nrt_python.c @@ -54,6 +54,8 @@ int MemInfo_init(MemInfoObject *self, PyObject *args, PyObject *kwds) { return -1; } raw_ptr = PyLong_AsVoidPtr(raw_ptr_obj); + NRT_Debug(nrt_debug_print("MemInfo_init self=%p raw_ptr=%p\n", self, raw_ptr)); + if(PyErr_Occurred()) return -1; self->meminfo = (NRT_MemInfo *)raw_ptr; assert (NRT_MemInfo_refcount(self->meminfo) > 0 && "0 refcount"); @@ -116,6 +118,19 @@ MemInfo_get_external_allocator(MemInfoObject *self, void *closure) { return PyLong_FromVoidPtr(p); } +static +PyObject* +MemInfo_get_parent(MemInfoObject *self, void *closure) { + void *p = NRT_MemInfo_parent(self->meminfo); + if (p) { + Py_INCREF(p); + return (PyObject*)p; + } else { + Py_INCREF(Py_None); + return Py_None; + } +} + static void MemInfo_dealloc(MemInfoObject *self) { @@ -147,6 +162,9 @@ static PyGetSetDef MemInfo_getsets[] = { (getter)MemInfo_get_external_allocator, NULL, "Get the external allocator", NULL}, + {"parent", + (getter)MemInfo_get_parent, NULL, + NULL}, {NULL} /* Sentinel */ }; @@ -329,10 +347,13 @@ NRT_adapt_ndarray_to_python(arystruct_t* arystruct, PyTypeObject *retty, int ndi args = PyTuple_New(1); /* SETITEM steals reference */ PyTuple_SET_ITEM(args, 0, PyLong_FromVoidPtr(arystruct->meminfo)); + NRT_Debug(nrt_debug_print("NRT_adapt_ndarray_to_python arystruct->meminfo=%p\n", arystruct->meminfo)); /* Note: MemInfo_init() does not incref. This function steals the * NRT reference. */ + NRT_Debug(nrt_debug_print("NRT_adapt_ndarray_to_python created MemInfo=%p\n", miobj)); if (MemInfo_init(miobj, args, NULL)) { + NRT_Debug(nrt_debug_print("MemInfo_init returned 0.\n")); return NULL; } Py_DECREF(args); diff --git a/numba/core/runtime/nrt.c b/numba/core/runtime/nrt.c index 33388c53106..fe63a691537 100644 --- a/numba/core/runtime/nrt.c +++ b/numba/core/runtime/nrt.c @@ -180,7 +180,7 @@ void NRT_MemInfo_init(NRT_MemInfo *mi,void *data, size_t size, mi->data = data; mi->size = size; mi->external_allocator = external_allocator; - NRT_Debug(nrt_debug_print("NRT_MemInfo_init %p\n", external_allocator)); + NRT_Debug(nrt_debug_print("NRT_MemInfo_init mi=%p external_allocator=%p\n", mi, external_allocator)); /* Update stats */ TheMSys.atomic_inc(&TheMSys.stats_mi_alloc); } @@ -189,6 +189,7 @@ NRT_MemInfo *NRT_MemInfo_new(void *data, size_t size, NRT_dtor_function dtor, void *dtor_info) { NRT_MemInfo *mi = NRT_Allocate(sizeof(NRT_MemInfo)); + NRT_Debug(nrt_debug_print("NRT_MemInfo_new mi=%p\n", mi)); NRT_MemInfo_init(mi, data, size, dtor, dtor_info, NULL); return mi; } @@ -369,6 +370,10 @@ void * NRT_MemInfo_external_allocator(NRT_MemInfo *mi) { return mi->external_allocator; } +void *NRT_MemInfo_parent(NRT_MemInfo *mi) { + return mi->dtor_info; +} + void NRT_MemInfo_dump(NRT_MemInfo *mi, FILE *out) { fprintf(out, "MemInfo %p refcount %zu\n", mi, mi->refct); } @@ -460,12 +465,12 @@ void* NRT_Allocate(size_t size) { void* NRT_Allocate_External(size_t size, NRT_ExternalAllocator *allocator) { void *ptr; if (allocator) { - NRT_Debug(nrt_debug_print("NRT_Allocate custom allocator %zu\n", size)); ptr = allocator->malloc(size, allocator->opaque_data); + NRT_Debug(nrt_debug_print("NRT_Allocate custom bytes=%zu ptr=%p\n", size, ptr)); } else { ptr = TheMSys.allocator.malloc(size); + NRT_Debug(nrt_debug_print("NRT_Allocate bytes=%zu ptr=%p\n", size, ptr)); } - NRT_Debug(nrt_debug_print("NRT_Allocate bytes=%zu ptr=%p\n", size, ptr)); TheMSys.atomic_inc(&TheMSys.stats_alloc); return ptr; } diff --git a/numba/core/runtime/nrt.h b/numba/core/runtime/nrt.h index 530c6786731..9fb23532964 100644 --- a/numba/core/runtime/nrt.h +++ b/numba/core/runtime/nrt.h @@ -190,6 +190,12 @@ size_t NRT_MemInfo_size(NRT_MemInfo* mi); VISIBILITY_HIDDEN void* NRT_MemInfo_external_allocator(NRT_MemInfo* mi); +/* + * Returns the parent MemInfo + */ +VISIBILITY_HIDDEN +void* NRT_MemInfo_parent(NRT_MemInfo* mi); + /* * NRT API for resizable buffers. diff --git a/numba/dppl/dparray.py b/numba/dppl/dparray.py index 55dc2c7374b..fc3952b9297 100644 --- a/numba/dppl/dparray.py +++ b/numba/dppl/dparray.py @@ -15,6 +15,7 @@ import builtins import sys from ctypes.util import find_library +from numba.core.typing.templates import builtin_registry as templates_registry from numba.core.typing.npydecl import registry as typing_registry from numba.core.imputils import builtin_registry as lower_registry import importlib @@ -113,7 +114,7 @@ def __new__(subtype, shape, return new_obj def __array_finalize__(self, obj): - dprint("__array_finalize__:", obj, type(obj)) + dprint("__array_finalize__:", obj, hex(id(obj)), type(obj)) # import pdb # pdb.set_trace() # When called from the explicit constructor, obj is None @@ -125,15 +126,21 @@ def __array_finalize__(self, obj): if hasattr(obj, array_interface_property): return if isinstance(obj, numba.core.runtime._nrt_python._MemInfo): - dprint("array_finalize got Numba MemInfo") - ea = obj.external_allocator - d = obj.data - dprint("external_allocator:", ea, hex(ea), type(ea)) - dprint("data:", d, hex(d), type(d)) - dppl_rt_allocator = numba.dppl._dppl_rt.get_external_allocator() - dprint("dppl external_allocator:", dppl_rt_allocator, hex(dppl_rt_allocator), type(dppl_rt_allocator)) - if ea == dppl_rt_allocator: - return + mobj = obj + while isinstance(mobj, numba.core.runtime._nrt_python._MemInfo): + dprint("array_finalize got Numba MemInfo") + ea = mobj.external_allocator + d = mobj.data + dprint("external_allocator:", hex(ea), type(ea)) + dprint("data:", hex(d), type(d)) + dppl_rt_allocator = numba.dppl._dppl_rt.get_external_allocator() + dprint("dppl external_allocator:", hex(dppl_rt_allocator), type(dppl_rt_allocator)) + dprint(dir(mobj)) + if ea == dppl_rt_allocator: + return + mobj = mobj.parent + if isinstance(mobj, ndarray): + mobj = mobj.base if isinstance(obj, np.ndarray): ob = self while isinstance(ob, np.ndarray): @@ -363,7 +370,6 @@ def box_array(typ, val, c): dtypeptr = c.env_manager.read_const(c.env_manager.add_const(np_dtype)) # Steals NRT ref newary = c.pyapi.nrt_adapt_ndarray_to_python(typ, val, dtypeptr) - print("box_array:", c, type(c), c.pyapi, type(c.pyapi)) return newary else: parent = nativeary.parent @@ -419,9 +425,13 @@ def copy_func_for_dparray(f, dparray_mod): g.__kwdefaults__ = f.__kwdefaults__ return g +def types_replace_array(x): + return tuple([z if z != types.Array else DPArrayType for z in x]) + def numba_register_lower_builtin(): todo = [] todo_builtin = [] + todo_getattr = [] # For all Numpy identifiers that have been registered for typing in Numba... # this registry contains functions, getattrs, setattrs, casts and constants...need to do them all? FIX FIX FIX @@ -446,6 +456,16 @@ def numba_register_lower_builtin(): todo.append(ig) # print("todo_builtin added:", func.__name__) + for lg in lower_registry.getattrs: + func, attr, types = lg + types_with_dparray = types_replace_array(types) + if DPArrayType in types_with_dparray: + dprint("lower_getattr:", func, type(func), attr, type(attr), types, type(types)) + todo_getattr.append((func, attr, types_with_dparray)) + + for lg in todo_getattr: + lower_registry.getattrs.append(lg) + cur_mod = importlib.import_module(__name__) for impl, func, types in (todo+todo_builtin): dparray_func = eval(func.__name__) @@ -464,6 +484,7 @@ def argspec_to_string(argspec): def numba_register_typing(): todo = [] todo_classes = [] + todo_getattr = [] # For all Numpy identifiers that have been registered for typing in Numba... for ig in typing_registry.globals: @@ -478,6 +499,10 @@ def numba_register_typing(): if isinstance(val, type): todo_classes.append(ig) + for tgetattr in templates_registry.attributes: + if tgetattr.key == types.Array: + todo_getattr.append(tgetattr) + # This is actuallya no-op now. # for val, typ in todo_classes: # print("todo_classes:", val, type(val), typ, type(typ)) @@ -556,8 +581,8 @@ def generic_impl(self): return exec_res new_dparray_template = type(class_name, (template,), { - "set_class_vars":set_key_original, - "generic":generic_impl}) + "set_class_vars" : set_key_original, + "generic" : generic_impl}) new_dparray_template.set_class_vars(dpval, template) @@ -565,6 +590,34 @@ def generic_impl(self): type_handler = types.Function(new_dparray_template) typing_registry.register_global(dpval, type_handler) + # Handle dparray attribute typing. + for tgetattr in todo_getattr: + class_name = tgetattr.__name__ + "_dparray" + dprint("tgetattr:", tgetattr, type(tgetattr), class_name) + + @classmethod + def set_key(cls, key): + cls.key = key + + def getattr_impl(self, attr): + if attr.startswith('resolve_'): + #print("getattr_impl starts with resolve_:", self, type(self), attr) + def wrapper(*args, **kwargs): + attr_res = tgetattr.__getattribute__(self, attr)(*args, **kwargs) + if isinstance(attr_res, types.Array): + return DPArrayType(dtype=attr_res.dtype, ndim=attr_res.ndim, layout=attr_res.layout) + return wrapper + else: + return tgetattr.__getattribute__(self, attr) + + new_dparray_template = type(class_name, (tgetattr,), { + "set_class_vars" : set_key, + "__getattribute__" : getattr_impl}) + + new_dparray_template.set_class_vars(DPArrayType) + templates_registry.register_attr(new_dparray_template) + + def from_ndarray(x): return copy(x) diff --git a/numba/dppl/dppl_rt.c b/numba/dppl/dppl_rt.c index dde583a4c4c..75c05ff5856 100644 --- a/numba/dppl/dppl_rt.c +++ b/numba/dppl/dppl_rt.c @@ -6,8 +6,8 @@ NRT_ExternalAllocator dparray_allocator; -void dparray_memsys_init() { - void *(*get_queue)(); +void dparray_memsys_init(void) { + void *(*get_queue)(void); char *lib_name = "libDPPLSyclInterface.so"; char *malloc_name = "DPPLmalloc_shared"; char *free_name = "DPPLfree_with_queue"; @@ -35,7 +35,7 @@ void dparray_memsys_init() { // printf("dparray_memsys_init: %p %p %p\n", dparray_allocator.malloc, dparray_allocator.free, dparray_allocator.opaque_data); } -void * dparray_get_ext_allocator() { +void * dparray_get_ext_allocator(void) { printf("dparray_get_ext_allocator %p\n", &dparray_allocator); return (void*)&dparray_allocator; } diff --git a/numba/dppl/tests/dppl/test_dparray.py b/numba/dppl/tests/dppl/test_dparray.py index 1578d4c8612..5ed1ad3d339 100644 --- a/numba/dppl/tests/dppl/test_dparray.py +++ b/numba/dppl/tests/dppl/test_dparray.py @@ -50,6 +50,16 @@ def f9(a): def f10(): return dparray.empty((10,10)) +@numba.njit +def f11(x): + return x.shape + +@numba.njit +def f12(x): + return x.T + +#-------------------------------------------------------------------------------- + print("------------------- Testing Python Numpy") sys.stdout.flush() z1 = numpy.ones(10) @@ -159,6 +169,7 @@ def f10(): print("f:", f, type(f)) assert(isinstance(f, dparray.ndarray)) assert(dparray.has_array_interface(f)) +del f print("------------------- Testing Numba dparray.as_ndarray") sys.stdout.flush() @@ -172,6 +183,8 @@ def f10(): print("dp2:", dp2, type(dp2)) assert(isinstance(dp2, dparray.ndarray)) assert(dparray.has_array_interface(dp2)) +del nd3 +del dp2 print("------------------- Testing Numba dparray.empty") sys.stdout.flush() @@ -180,6 +193,20 @@ def f10(): assert(isinstance(dp3, dparray.ndarray)) assert(dparray.has_array_interface(dp3)) +print("------------------- Testing Numba dparray.shape") +sys.stdout.flush() +s1 = f11(dp3) +print("s1:", s1, type(s1)) + +print("------------------- Testing Numba dparray.T") +sys.stdout.flush() +dp4 = f12(dp3) +print("dp4:", dp4, type(dp4)) +assert(isinstance(dp4, dparray.ndarray)) +assert(dparray.has_array_interface(dp4)) +del dp3 +del dp4 + #------------------------------- del a From f954be95e9b920f4fa7e293e4214e53639f59b4a Mon Sep 17 00:00:00 2001 From: "Todd A. Anderson" Date: Wed, 4 Nov 2020 16:33:22 -0600 Subject: [PATCH 23/23] Don't redefine symbol if we specifically overloaded it prior. --- numba/dppl/dparray.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/numba/dppl/dparray.py b/numba/dppl/dparray.py index fc3952b9297..a04fe674d4b 100644 --- a/numba/dppl/dparray.py +++ b/numba/dppl/dparray.py @@ -232,8 +232,17 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): else: return NotImplemented +def isdef(x): + try: + eval(x) + return True + except NameEror: + return False + for c in class_list: cname = c[0] + if isdef(cname): + continue # For now we do the simple thing and copy the types from NumPy module into dparray module. new_func = "%s = np.%s" % (cname, cname) # new_func = "class %s(np.%s):\n" % (cname, cname) @@ -261,6 +270,8 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): # instead. This is a stop-gap. We should eventually find a # way to do the allocation correct to start with. for fname in functions_list: + if isdef(fname): + continue # print("Adding function", fname) new_func = "def %s(*args, **kwargs):\n" % fname new_func += " ret = np.%s(*args, **kwargs)\n" % fname