diff --git a/pydatastructs/linear_data_structures/_backend/cpp/algorithms/algorithms.cpp b/pydatastructs/linear_data_structures/_backend/cpp/algorithms/algorithms.cpp index 2ce135b92..5829d07a2 100644 --- a/pydatastructs/linear_data_structures/_backend/cpp/algorithms/algorithms.cpp +++ b/pydatastructs/linear_data_structures/_backend/cpp/algorithms/algorithms.cpp @@ -10,6 +10,8 @@ static PyMethodDef algorithms_PyMethodDef[] = { METH_VARARGS | METH_KEYWORDS, ""}, {"bubble_sort_llvm", (PyCFunction)bubble_sort_llvm, METH_VARARGS | METH_KEYWORDS, ""}, + {"selection_sort_llvm", (PyCFunction)selection_sort_llvm, + METH_VARARGS | METH_KEYWORDS, ""}, {"selection_sort", (PyCFunction) selection_sort, METH_VARARGS | METH_KEYWORDS, ""}, {"insertion_sort", (PyCFunction) insertion_sort, diff --git a/pydatastructs/linear_data_structures/_backend/cpp/algorithms/llvm_algorithms.py b/pydatastructs/linear_data_structures/_backend/cpp/algorithms/llvm_algorithms.py index 24b14609d..56b548c3c 100644 --- a/pydatastructs/linear_data_structures/_backend/cpp/algorithms/llvm_algorithms.py +++ b/pydatastructs/linear_data_structures/_backend/cpp/algorithms/llvm_algorithms.py @@ -167,3 +167,131 @@ def _materialize(dtype: str) -> int: except Exception as e: raise RuntimeError(f"Failed to materialize function for dtype {dtype}: {e}") + +def get_selection_sort_ptr(dtype: str) -> int: + """Get function pointer for selection sort with specified dtype.""" + dtype = dtype.lower().strip() + if dtype not in _SUPPORTED: + raise ValueError(f"Unsupported dtype '{dtype}'. Supported: {list(_SUPPORTED)}") + + return _materialize_selection(dtype) + + +def _build_selection_sort_ir(dtype: str) -> str: + if dtype not in _SUPPORTED: + raise ValueError(f"Unsupported dtype '{dtype}'. Supported: {list(_SUPPORTED)}") + + T, _ = _SUPPORTED[dtype] + i32 = ir.IntType(32) + i64 = ir.IntType(64) + + mod = ir.Module(name=f"selection_sort_{dtype}_module") + fn_name = f"selection_sort_{dtype}" + + fn_ty = ir.FunctionType(ir.VoidType(), [T.as_pointer(), i32]) + fn = ir.Function(mod, fn_ty, name=fn_name) + + arr, n = fn.args + arr.name, n.name = "arr", "n" + + # Basic blocks + b_entry = fn.append_basic_block("entry") + b_outer = fn.append_basic_block("outer") + b_inner = fn.append_basic_block("inner") + b_inner_latch = fn.append_basic_block("inner.latch") + b_swap = fn.append_basic_block("swap") + b_exit = fn.append_basic_block("exit") + + b = ir.IRBuilder(b_entry) + cond_trivial = b.icmp_signed("<=", n, ir.Constant(i32, 1)) + b.cbranch(cond_trivial, b_exit, b_outer) + + # Outer loop + b.position_at_end(b_outer) + i_phi = b.phi(i32, name="i") + i_phi.add_incoming(ir.Constant(i32, 0), b_entry) # start at 0 + + cond_outer = b.icmp_signed("<", i_phi, n) + b.cbranch(cond_outer, b_inner, b_exit) + + # Inner loop: find min index + b.position_at_end(b_inner) + min_idx = b_phi = b_phi_i = b.phi(i32, name="min_idx") + min_idx.add_incoming(i_phi, b_outer) # initial min_idx = i + + j_phi = b.phi(i32, name="j") + j_phi.add_incoming(b.add(i_phi, ir.Constant(i32, 1)), b_outer) + + cond_inner = b.icmp_signed("<", j_phi, n) + b.cbranch(cond_inner, b_inner_latch, b_swap) + + # Compare and update min_idx + b.position_at_end(b_inner_latch) + j64 = b.sext(j_phi, i64) + min64 = b.sext(min_idx, i64) + arr_j_ptr = b.gep(arr, [j64], inbounds=True) + arr_min_ptr = b.gep(arr, [min64], inbounds=True) + arr_j_val = b.load(arr_j_ptr) + arr_min_val = b.load(arr_min_ptr) + + if isinstance(T, ir.IntType): + cmp = b.icmp_signed("<", arr_j_val, arr_min_val) + else: + cmp = b.fcmp_ordered("<", arr_j_val, arr_min_val) + + with b.if_then(cmp): + min_idx = j_phi # update min_idx + + j_next = b.add(j_phi, ir.Constant(i32, 1)) + j_phi.add_incoming(j_next, b_inner_latch) + min_idx.add_incoming(min_idx, b_inner_latch) # propagate current min_idx + b.branch(b_inner) + + # Swap arr[i] and arr[min_idx] + b.position_at_end(b_swap) + i64 = b.sext(i_phi, i64) + min64 = b.sext(min_idx, i64) + ptr_i = b.gep(arr, [i64], inbounds=True) + ptr_min = b.gep(arr, [min64], inbounds=True) + val_i = b.load(ptr_i) + val_min = b.load(ptr_min) + b.store(val_min, ptr_i) + b.store(val_i, ptr_min) + + # Increment i + i_next = b.add(i_phi, ir.Constant(i32, 1)) + i_phi.add_incoming(i_next, b_swap) + b.branch(b_outer) + + # Exit + b.position_at_end(b_exit) + b.ret_void() + + return str(mod) + + +def _materialize_selection(dtype: str) -> int: + _ensure_target_machine() + + name = f"selection_sort_{dtype}" + if dtype in _fn_ptr_cache: + return _fn_ptr_cache[dtype] + + try: + llvm_ir = _build_selection_sort_ir(dtype) + mod = binding.parse_assembly(llvm_ir) + mod.verify() + + engine = binding.create_mcjit_compiler(mod, _target_machine) + engine.finalize_object() + engine.run_static_constructors() + + addr = engine.get_function_address(name) + if not addr: + raise RuntimeError(f"Failed to get address for {name}") + + _fn_ptr_cache[dtype] = addr + _engines[dtype] = engine + return addr + except Exception as e: + raise RuntimeError(f"Failed to materialize function for dtype {dtype}: {e}") diff --git a/pydatastructs/linear_data_structures/_backend/cpp/algorithms/quadratic_time_sort.hpp b/pydatastructs/linear_data_structures/_backend/cpp/algorithms/quadratic_time_sort.hpp index 210382455..792c1a633 100644 --- a/pydatastructs/linear_data_structures/_backend/cpp/algorithms/quadratic_time_sort.hpp +++ b/pydatastructs/linear_data_structures/_backend/cpp/algorithms/quadratic_time_sort.hpp @@ -672,6 +672,551 @@ static PyObject* selection_sort(PyObject* self, PyObject* args, PyObject* kwds) return args0; } +static PyObject* selection_sort_llvm(PyObject* self, PyObject* args, PyObject* kwds) { + static const char* kwlist[] = {"arr", "start", "end", "comp", "dtype", NULL}; + PyObject* arr_obj = NULL; + PyObject* start_obj = NULL; + PyObject* end_obj = NULL; + PyObject* comp_obj = NULL; + const char* dtype_cstr = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OOOs", (char**)kwlist, + &arr_obj, &start_obj, &end_obj, &comp_obj, &dtype_cstr)) { + return NULL; + } + + Py_ssize_t arr_len_ssize = PyObject_Length(arr_obj); + + size_t arr_len = (size_t)arr_len_ssize; + + bool is_dynamic_array = false; + PyObject* last_pos_attr = PyUnicode_FromString("_last_pos_filled"); + PyObject* num_attr = PyUnicode_FromString("_num"); + + if (last_pos_attr && num_attr && PyObject_HasAttr(arr_obj, last_pos_attr) && PyObject_HasAttr(arr_obj, num_attr)) { + is_dynamic_array = true; + } + + Py_XDECREF(last_pos_attr); + Py_XDECREF(num_attr); + + if (is_dynamic_array) { + PyObject* size_attr = PyUnicode_FromString("_size"); + if (size_attr && PyObject_HasAttr(arr_obj, size_attr)) { + PyObject* size_obj = PyObject_GetAttr(arr_obj, size_attr); + if (size_obj && PyLong_Check(size_obj)) { + Py_ssize_t size_val = PyLong_AsSsize_t(size_obj); + if (size_val >= 0) { + arr_len = (size_t)size_val; + } + } + Py_XDECREF(size_obj); + } + Py_XDECREF(size_attr); + } + + if (arr_len == 0) { + Py_INCREF(arr_obj); + return arr_obj; + } + + size_t lower = 0; + size_t upper = arr_len - 1; + + if (start_obj && start_obj != Py_None) { + Py_ssize_t start_val = PyLong_AsSsize_t(start_obj); + if (PyErr_Occurred()) return NULL; + lower = (size_t)start_val; + } + + if (end_obj && end_obj != Py_None) { + Py_ssize_t end_val = PyLong_AsSsize_t(end_obj); + if (PyErr_Occurred()) return NULL; + upper = (size_t)end_val; + } + + if (upper < lower || lower >= arr_len) { + Py_INCREF(arr_obj); + return arr_obj; + } + + if (upper >= arr_len) { + upper = arr_len - 1; + } + + if (comp_obj && comp_obj != Py_None) { + PyErr_SetString(PyExc_NotImplementedError, "LLVM backend does not support custom 'comp'."); + return NULL; + } + + std::string dtype = (dtype_cstr && *dtype_cstr) ? std::string(dtype_cstr) : std::string(); + auto infer_dtype = [&](PyObject* x) -> std::string { + if (x == Py_None) return ""; + if (PyFloat_Check(x)) return "float64"; + if (PyLong_Check(x)) return "int64"; + return "float64"; + }; + + std::vector non_none_values; + size_t none_count = 0; + const size_t N = upper - lower + 1; + + non_none_values.reserve(N); + + for (size_t i = 0; i < N; i++) { + size_t actual_index = lower + i; + + if (actual_index >= arr_len) { + for (PyObject* obj : non_none_values) { + Py_DECREF(obj); + } + PyErr_Format(PyExc_IndexError, "Index %zu out of bounds (array length: %zu)", + actual_index, arr_len); + return NULL; + } + + PyObject* item = NULL; + + if (PySequence_Check(arr_obj)) { + item = PySequence_GetItem(arr_obj, (Py_ssize_t)actual_index); + } else { + PyObject* index_obj = PyLong_FromSize_t(actual_index); + if (index_obj) { + item = PyObject_GetItem(arr_obj, index_obj); + Py_DECREF(index_obj); + } + } + + if (!item) { + for (PyObject* obj : non_none_values) { + Py_DECREF(obj); + } + + if (PyErr_ExceptionMatches(PyExc_IndexError)) { + PyErr_Format(PyExc_IndexError, "Cannot access index %zu in array", actual_index); + } + return NULL; + } + + if (item == Py_None) { + none_count++; + Py_DECREF(item); + } else { + non_none_values.push_back(item); + } + } + + if (dtype.empty() && !non_none_values.empty()) { + dtype = infer_dtype(non_none_values[0]); + } + + if (non_none_values.empty() || dtype.empty()) { + for (PyObject* obj : non_none_values) { + Py_DECREF(obj); + } + Py_INCREF(arr_obj); + return arr_obj; + } + + auto get_addr = [&](const char* dtype_str) -> PyObject* { + PyObject* sys = PyImport_ImportModule("sys"); + PyObject* sys_path = PyObject_GetAttrString(sys, "path"); + Py_DECREF(sys); + + Py_ssize_t original_len = PyList_Size(sys_path); + if (original_len == -1) { + Py_DECREF(sys_path); + return NULL; + } + + PyObject* path = PyUnicode_FromString("pydatastructs/linear_data_structures/_backend/cpp/algorithms"); + if (!path) { + Py_DECREF(sys_path); + return NULL; + } + + int append_result = PyList_Append(sys_path, path); + Py_DECREF(path); + + if (append_result != 0) { + Py_DECREF(sys_path); + return NULL; + } + + PyObject* mod = PyImport_ImportModule("llvm_algorithms"); + + if (PyList_SetSlice(sys_path, original_len, original_len + 1, NULL) != 0) { + PyErr_Clear(); + } + Py_DECREF(sys_path); + if (!mod) { + return NULL; + } + + PyObject* fn = PyObject_GetAttrString(mod, "get_selection_sort_ptr"); + Py_DECREF(mod); + if (!fn) { + return NULL; + } + + PyObject* arg = PyUnicode_FromString(dtype_str); + if (!arg) { + Py_DECREF(fn); + return NULL; + } + + PyObject* addr_obj = PyObject_CallFunctionObjArgs(fn, arg, NULL); + Py_DECREF(fn); + Py_DECREF(arg); + if (!addr_obj) { + return NULL; + } + return addr_obj; + }; + + if (N > INT32_MAX) { + for (PyObject* obj : non_none_values) { + Py_DECREF(obj); + } + PyErr_SetString(PyExc_OverflowError, "Slice length exceeds 32-bit limit for JIT function signature."); + return NULL; + } + + if (dtype == "int32" || dtype == "int64") { + bool is32 = (dtype == "int32"); + PyObject* addr_obj = get_addr(dtype.c_str()); + if (!addr_obj) { + for (PyObject* obj : non_none_values) { + Py_DECREF(obj); + } + return NULL; + } + + long long addr = PyLong_AsLongLong(addr_obj); + Py_DECREF(addr_obj); + if (addr == -1 && PyErr_Occurred()) { + for (PyObject* obj : non_none_values) { + Py_DECREF(obj); + } + return NULL; + } + + if (is32) { + std::vector buf; + buf.reserve(non_none_values.size()); + + for (size_t i = 0; i < non_none_values.size(); i++) { + PyObject* obj = non_none_values[i]; + + long v = PyLong_AsLong(obj); + if (PyErr_Occurred()) { + for (PyObject* cleanup_obj : non_none_values) { + Py_DECREF(cleanup_obj); + } + return NULL; + } + + if (v < INT32_MIN || v > INT32_MAX) { + for (PyObject* cleanup_obj : non_none_values) { + Py_DECREF(cleanup_obj); + } + PyErr_Format(PyExc_OverflowError, "Value %ld at index %zu out of int32 range", v, i); + return NULL; + } + buf.push_back((int32_t)v); + } + + for (PyObject* obj : non_none_values) { + Py_DECREF(obj); + } + non_none_values.clear(); + + if (buf.empty()) { + Py_INCREF(arr_obj); + return arr_obj; + } + + try { + auto fn = reinterpret_cast(addr); + fn(buf.data(), (int32_t)buf.size()); + } catch (...) { + PyErr_SetString(PyExc_RuntimeError, "LLVM function call failed"); + return NULL; + } + for (size_t i = 0; i < buf.size(); i++) { + size_t actual_index = lower + i; + + if (actual_index >= arr_len) { + PyErr_Format(PyExc_IndexError, "Assignment index %zu out of bounds (array length: %zu)", + actual_index, arr_len); + return NULL; + } + + PyObject* new_value = PyLong_FromLong((long)buf[i]); + if (!new_value) return NULL; + PyObject* index_obj = PyLong_FromSize_t(actual_index); + if (!index_obj) { + Py_DECREF(new_value); + return NULL; + } + + int result = PyObject_SetItem(arr_obj, index_obj, new_value); + Py_DECREF(index_obj); + Py_DECREF(new_value); + } + + for (size_t i = buf.size(); i < N; i++) { + size_t actual_index = lower + i; + + PyObject* index_obj = PyLong_FromSize_t(actual_index); + if (!index_obj) { + return NULL; + } + + Py_INCREF(Py_None); + int result = PyObject_SetItem(arr_obj, index_obj, Py_None); + Py_DECREF(index_obj); + } + + } else { + std::vector buf; + buf.reserve(non_none_values.size()); + + for (size_t i = 0; i < non_none_values.size(); i++) { + PyObject* obj = non_none_values[i]; + long long v = PyLong_AsLongLong(obj); + if (PyErr_Occurred()) { + for (PyObject* cleanup_obj : non_none_values) { + Py_DECREF(cleanup_obj); + } + return NULL; + } + buf.push_back((int64_t)v); + } + + for (PyObject* obj : non_none_values) { + Py_DECREF(obj); + } + non_none_values.clear(); + + if (buf.empty()) { + Py_INCREF(arr_obj); + return arr_obj; + } + + try { + auto fn = reinterpret_cast(addr); + if (!fn) { + PyErr_SetString(PyExc_RuntimeError, "Invalid function pointer"); + return NULL; + } + fn(buf.data(), (int32_t)buf.size()); + } catch (...) { + PyErr_SetString(PyExc_RuntimeError, "LLVM function call failed"); + return NULL; + } + for (size_t i = 0; i < buf.size(); i++) { + size_t actual_index = lower + i; + if (actual_index >= arr_len) { + PyErr_Format(PyExc_IndexError, "Assignment index %zu out of bounds", actual_index); + return NULL; + } + PyObject* new_value = PyLong_FromLongLong((long long)buf[i]); + if (!new_value) return NULL; + PyObject* index_obj = PyLong_FromSize_t(actual_index); + if (!index_obj) { + Py_DECREF(new_value); + return NULL; + } + int result = PyObject_SetItem(arr_obj, index_obj, new_value); + Py_DECREF(index_obj); + Py_DECREF(new_value); + } + + for (size_t i = buf.size(); i < N; i++) { + size_t actual_index = lower + i; + + PyObject* index_obj = PyLong_FromSize_t(actual_index); + if (!index_obj) { + return NULL; + } + Py_INCREF(Py_None); + int result = PyObject_SetItem(arr_obj, index_obj, Py_None); + Py_DECREF(index_obj); + } + } + } + else if (dtype == "float32" || dtype == "float64") { + bool is32 = (dtype == "float32"); + PyObject* addr_obj = get_addr(dtype.c_str()); + if (!addr_obj) { + for (PyObject* obj : non_none_values) { + Py_DECREF(obj); + } + return NULL; + } + long long addr = PyLong_AsLongLong(addr_obj); + Py_DECREF(addr_obj); + if (addr == -1 && PyErr_Occurred()) { + for (PyObject* obj : non_none_values) { + Py_DECREF(obj); + } + return NULL; + } + if (is32) { + std::vector buf; + buf.reserve(non_none_values.size()); + + for (size_t i = 0; i < non_none_values.size(); i++) { + PyObject* obj = non_none_values[i]; + double v = PyFloat_AsDouble(obj); + if (PyErr_Occurred()) { + for (PyObject* cleanup_obj : non_none_values) { + Py_DECREF(cleanup_obj); + } + return NULL; + } + buf.push_back((float)v); + } + + for (PyObject* obj : non_none_values) { + Py_DECREF(obj); + } + non_none_values.clear(); + if (buf.empty()) { + Py_INCREF(arr_obj); + return arr_obj; + } + + try { + auto fn = reinterpret_cast(addr); + if (!fn) { + PyErr_SetString(PyExc_RuntimeError, "Invalid function pointer"); + return NULL; + } + fn(buf.data(), (int32_t)buf.size()); + } catch (...) { + PyErr_SetString(PyExc_RuntimeError, "LLVM function call failed"); + return NULL; + } + + for (size_t i = 0; i < buf.size(); i++) { + size_t actual_index = lower + i; + if (actual_index >= arr_len) { + PyErr_Format(PyExc_IndexError, "Assignment index %zu out of bounds", actual_index); + return NULL; + } + PyObject* new_value = PyFloat_FromDouble((double)buf[i]); + if (!new_value) return NULL; + PyObject* index_obj = PyLong_FromSize_t(actual_index); + if (!index_obj) { + Py_DECREF(new_value); + return NULL; + } + int result = PyObject_SetItem(arr_obj, index_obj, new_value); + Py_DECREF(index_obj); + Py_DECREF(new_value); + } + + for (size_t i = buf.size(); i < N; i++) { + size_t actual_index = lower + i; + + PyObject* index_obj = PyLong_FromSize_t(actual_index); + if (!index_obj) { + return NULL; + } + Py_INCREF(Py_None); + int result = PyObject_SetItem(arr_obj, index_obj, Py_None); + Py_DECREF(index_obj); + } + } + else { + std::vector buf; + buf.reserve(non_none_values.size()); + + for (size_t i = 0; i < non_none_values.size(); i++) { + PyObject* obj = non_none_values[i]; + double v = PyFloat_AsDouble(obj); + if (PyErr_Occurred()) { + for (PyObject* cleanup_obj : non_none_values) { + Py_DECREF(cleanup_obj); + } + return NULL; + } + buf.push_back(v); + } + + for (PyObject* obj : non_none_values) { + Py_DECREF(obj); + } + non_none_values.clear(); + + if (buf.empty()) { + Py_INCREF(arr_obj); + return arr_obj; + } + + try { + auto fn = reinterpret_cast(addr); + if (!fn) { + PyErr_SetString(PyExc_RuntimeError, "Invalid function pointer"); + return NULL; + } + fn(buf.data(), (int32_t)buf.size()); + } catch (...) { + PyErr_SetString(PyExc_RuntimeError, "LLVM function call failed"); + return NULL; + } + + for (size_t i = 0; i < buf.size(); i++) { + size_t actual_index = lower + i; + + PyObject* new_value = PyFloat_FromDouble(buf[i]); + if (!new_value) return NULL; + + PyObject* index_obj = PyLong_FromSize_t(actual_index); + if (!index_obj) { + Py_DECREF(new_value); + return NULL; + } + + int result = PyObject_SetItem(arr_obj, index_obj, new_value); + Py_DECREF(index_obj); + Py_DECREF(new_value); + } + + for (size_t i = buf.size(); i < N; i++) { + size_t actual_index = lower + i; + PyObject* index_obj = PyLong_FromSize_t(actual_index); + if (!index_obj) { + return NULL; + } + Py_INCREF(Py_None); + int result = PyObject_SetItem(arr_obj, index_obj, Py_None); + Py_DECREF(index_obj); + } + } + + } else { + for (PyObject* obj : non_none_values) { + Py_DECREF(obj); + } + PyErr_SetString(PyExc_ValueError, "dtype must be one of: int32,int64,float32,float64"); + return NULL; + } + + if (is_dynamic_array) { + PyObject* modify_result = PyObject_CallMethod(arr_obj, "_modify", "O", Py_True); + if (!modify_result) { + PyErr_Clear(); + } else { + Py_DECREF(modify_result); + } + } + + Py_INCREF(arr_obj); + return arr_obj; +} // Insertion Sort static PyObject* insertion_sort_impl(PyObject* array, size_t lower, size_t upper, diff --git a/pydatastructs/linear_data_structures/algorithms.py b/pydatastructs/linear_data_structures/algorithms.py index 4010a3c28..eebb93f9c 100644 --- a/pydatastructs/linear_data_structures/algorithms.py +++ b/pydatastructs/linear_data_structures/algorithms.py @@ -1440,6 +1440,8 @@ def selection_sort(array, **kwargs): backend = kwargs.pop("backend", Backend.PYTHON) if backend == Backend.CPP: return _algorithms.selection_sort(array, **kwargs) + if backend == Backend.LLVM: + return _algorithms.selection_sort_llvm(array, **kwargs) start = kwargs.get('start', 0) end = kwargs.get('end', len(array) - 1) comp = kwargs.get('comp', lambda u, v: u <= v) diff --git a/pydatastructs/linear_data_structures/tests/test_algorithms.py b/pydatastructs/linear_data_structures/tests/test_algorithms.py index 3e287bb74..8d3f80933 100644 --- a/pydatastructs/linear_data_structures/tests/test_algorithms.py +++ b/pydatastructs/linear_data_structures/tests/test_algorithms.py @@ -126,6 +126,7 @@ def test_bubble_sort(): def test_selection_sort(): _test_common_sort(selection_sort) _test_common_sort(selection_sort, backend=Backend.CPP) + _test_common_sort(selection_sort, backend=Backend.LLVM) def test_insertion_sort(): _test_common_sort(insertion_sort)