From a28aad89a6de16b4d9a155a8855131aee0d17aae Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Thu, 4 Sep 2025 02:41:21 +0530 Subject: [PATCH 01/19] added llvm optimizations for bubble sort --- .../cpp/algorithms/llvm_algorithms.py | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/pydatastructs/linear_data_structures/_backend/cpp/algorithms/llvm_algorithms.py b/pydatastructs/linear_data_structures/_backend/cpp/algorithms/llvm_algorithms.py index c2aa2beb5..20c4c4ce6 100644 --- a/pydatastructs/linear_data_structures/_backend/cpp/algorithms/llvm_algorithms.py +++ b/pydatastructs/linear_data_structures/_backend/cpp/algorithms/llvm_algorithms.py @@ -32,7 +32,10 @@ def _ensure_target_machine(): binding.initialize_native_asmprinter() target = binding.Target.from_default_triple() - _target_machine = target.create_target_machine() + _target_machine = target.create_target_machine( + opt=3, + features="+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2" + ) except Exception as e: raise RuntimeError(f"Failed to initialize LLVM target machine: {e}") @@ -110,7 +113,7 @@ def _build_bubble_sort_ir(dtype: str) -> str: if isinstance(T, ir.IntType): should_swap = b.icmp_signed(">", val_j, val_jp1) else: - should_swap = b.fcmp_ordered(">", val_j, val_jp1) + should_swap = b.fcmp_ordered(">", val_j, val_jp1, fastmath=True) b.cbranch(should_swap, b_swap, b_inner_latch) @@ -145,6 +148,32 @@ def _materialize(dtype: str) -> int: mod = binding.parse_assembly(llvm_ir) mod.verify() + pmb = binding.PassManagerBuilder() + pmb.opt_level = 3 + pmb.loop_vectorize = True + pmb.slp_vectorize = True + + fpm = binding.create_function_pass_manager(mod) + pm = binding.create_module_pass_manager() + + pm.add_basic_alias_analysis_pass() + pm.add_type_based_alias_analysis_pass() + pm.add_instruction_combining_pass() + pm.add_gvn_pass() + pm.add_cfg_simplification_pass() + pm.add_loop_unroll_pass() + pm.add_loop_unswitch_pass() + + pmb.populate(fpm) + pmb.populate(pm) + + fpm.initialize() + for func in mod.functions: + fpm.run(func) + fpm.finalize() + + pm.run(mod) + engine = binding.create_mcjit_compiler(mod, _target_machine) engine.finalize_object() engine.run_static_constructors() From 49174dc4a1aabdaed365c8ab2995b37003e77e02 Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Tue, 23 Sep 2025 20:15:25 +0530 Subject: [PATCH 02/19] added llvm backend for adjacency list graphs --- .../graphs/_backend/cpp/AdjacencyList.hpp | 3 - .../graphs/_backend/cpp/AdjacencyListLLVM.hpp | 360 +++++ pydatastructs/graphs/_backend/cpp/graph.cpp | 19 +- .../_backend/cpp/llvm_adjacency_list.py | 1406 +++++++++++++++++ pydatastructs/graphs/_extensions.py | 19 +- pydatastructs/graphs/adjacency_list.py | 30 +- .../graphs/tests/test_adjacency_list.py | 12 + .../_backend/cpp/_algorithms.py | 0 .../_backend/cpp/_arrays.py | 0 .../cpp/algorithms/llvm_algorithms.py | 33 +- pydatastructs/utils/_extensions.py | 6 +- 11 files changed, 1846 insertions(+), 42 deletions(-) create mode 100644 pydatastructs/graphs/_backend/cpp/AdjacencyListLLVM.hpp create mode 100644 pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py create mode 100644 pydatastructs/linear_data_structures/_backend/cpp/_algorithms.py create mode 100644 pydatastructs/linear_data_structures/_backend/cpp/_arrays.py diff --git a/pydatastructs/graphs/_backend/cpp/AdjacencyList.hpp b/pydatastructs/graphs/_backend/cpp/AdjacencyList.hpp index ffbb6f476..6b9879c13 100644 --- a/pydatastructs/graphs/_backend/cpp/AdjacencyList.hpp +++ b/pydatastructs/graphs/_backend/cpp/AdjacencyList.hpp @@ -340,9 +340,6 @@ static PyObject* AdjacencyListGraph_add_edge(AdjacencyListGraph* self, PyObject* Py_RETURN_NONE; } - - - static PyMethodDef AdjacencyListGraph_methods[] = { {"add_vertex", (PyCFunction)AdjacencyListGraph_add_vertex, METH_VARARGS, "Add a vertex to the graph"}, {"add_edge", (PyCFunction)AdjacencyListGraph_add_edge, METH_VARARGS, "Add an edge to the graph"}, diff --git a/pydatastructs/graphs/_backend/cpp/AdjacencyListLLVM.hpp b/pydatastructs/graphs/_backend/cpp/AdjacencyListLLVM.hpp new file mode 100644 index 000000000..4a5f85dc5 --- /dev/null +++ b/pydatastructs/graphs/_backend/cpp/AdjacencyListLLVM.hpp @@ -0,0 +1,360 @@ +#include +#include +#include +#include +#include +#include + +extern PyTypeObject AdjacencyListGraphLLVMType; + +typedef void* (*GraphInitFunc)(); +typedef int (*AddVertexFunc)(void*, const char*, int); +typedef int (*AddEdgeFunc)(void*, const char*, int, const char*, int, double); +typedef int (*IsAdjacentFunc)(void*, const char*, int, const char*, int); +typedef int (*RemoveVertexFunc)(void*, const char*, int); +typedef int (*RemoveEdgeFunc)(void*, const char*, int, const char*, int); +typedef void (*GraphCleanupFunc)(void*); + +static GraphInitFunc llvm_graph_init = nullptr; +static AddVertexFunc llvm_add_vertex = nullptr; +static AddEdgeFunc llvm_add_edge = nullptr; +static IsAdjacentFunc llvm_is_adjacent = nullptr; +static RemoveVertexFunc llvm_remove_vertex = nullptr; +static RemoveEdgeFunc llvm_remove_edge = nullptr; +static GraphCleanupFunc llvm_graph_cleanup = nullptr; + +static void* llvm_execution_engine = nullptr; +static bool llvm_backend_initialized = false; + +typedef struct { + PyObject_HEAD + void* llvm_graph_ptr; + bool is_valid; +} AdjacencyListGraphLLVM; + +static int safe_strlen(const char* str) { + return str ? static_cast(strlen(str)) : 0; +} + +static PyObject* initialize_llvm_backend(PyObject* self, PyObject* args) { + PyObject* func_dict; + PyObject* ee_obj; + + if (!PyArg_ParseTuple(args, "OO", &func_dict, &ee_obj)) { + return nullptr; + } + + if (!PyDict_Check(func_dict)) { + PyErr_SetString(PyExc_TypeError, "First argument must be a dictionary"); + return nullptr; + } + + llvm_execution_engine = PyLong_AsVoidPtr(ee_obj); + if (PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "Invalid execution engine object"); + return nullptr; + } + + PyObject* init_ptr = PyDict_GetItemString(func_dict, "graph_init"); + PyObject* add_vertex_ptr = PyDict_GetItemString(func_dict, "add_vertex"); + PyObject* add_edge_ptr = PyDict_GetItemString(func_dict, "add_edge"); + PyObject* is_adjacent_ptr = PyDict_GetItemString(func_dict, "is_adjacent"); + PyObject* remove_vertex_ptr = PyDict_GetItemString(func_dict, "remove_vertex"); + PyObject* remove_edge_ptr = PyDict_GetItemString(func_dict, "remove_edge"); + PyObject* cleanup_ptr = PyDict_GetItemString(func_dict, "graph_cleanup"); + + if (!init_ptr || !add_vertex_ptr || !add_edge_ptr || !is_adjacent_ptr || + !remove_vertex_ptr || !remove_edge_ptr || !cleanup_ptr) { + PyErr_SetString(PyExc_ValueError, "Missing required function pointers in dictionary"); + return nullptr; + } + + llvm_graph_init = (GraphInitFunc)PyLong_AsVoidPtr(init_ptr); + llvm_add_vertex = (AddVertexFunc)PyLong_AsVoidPtr(add_vertex_ptr); + llvm_add_edge = (AddEdgeFunc)PyLong_AsVoidPtr(add_edge_ptr); + llvm_is_adjacent = (IsAdjacentFunc)PyLong_AsVoidPtr(is_adjacent_ptr); + llvm_remove_vertex = (RemoveVertexFunc)PyLong_AsVoidPtr(remove_vertex_ptr); + llvm_remove_edge = (RemoveEdgeFunc)PyLong_AsVoidPtr(remove_edge_ptr); + llvm_graph_cleanup = (GraphCleanupFunc)PyLong_AsVoidPtr(cleanup_ptr); + + if (PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "Failed to convert function pointers"); + return nullptr; + } + + if (!llvm_graph_init || !llvm_add_vertex || !llvm_add_edge || + !llvm_is_adjacent || !llvm_remove_vertex || !llvm_remove_edge || + !llvm_graph_cleanup) { + PyErr_SetString(PyExc_ValueError, "One or more function pointers are null"); + return nullptr; + } + + llvm_backend_initialized = true; + + Py_RETURN_NONE; +} + +static bool check_llvm_backend() { + return llvm_backend_initialized && llvm_graph_init && llvm_add_vertex && + llvm_add_edge && llvm_is_adjacent && llvm_remove_vertex && + llvm_remove_edge && llvm_graph_cleanup; +} + +static void AdjacencyListGraphLLVM_dealloc(AdjacencyListGraphLLVM* self) { + if (self->is_valid && self->llvm_graph_ptr && llvm_graph_cleanup) { + llvm_graph_cleanup(self->llvm_graph_ptr); + self->llvm_graph_ptr = nullptr; + self->is_valid = false; + } + Py_TYPE(self)->tp_free((PyObject*)self); +} + +static PyObject* AdjacencyListGraphLLVM_new(PyTypeObject* type, PyObject* args, PyObject* kwds) { + AdjacencyListGraphLLVM* self = (AdjacencyListGraphLLVM*)type->tp_alloc(type, 0); + if (!self) { + return nullptr; + } + + self->llvm_graph_ptr = nullptr; + self->is_valid = false; + + return (PyObject*)self; +} + +static int AdjacencyListGraphLLVM_init(AdjacencyListGraphLLVM* self, PyObject* args, PyObject* kwds) { + if (!check_llvm_backend()) { + PyErr_SetString(PyExc_RuntimeError, + "LLVM backend not initialized. Call initialize_llvm_backend() first."); + return -1; + } + + self->llvm_graph_ptr = llvm_graph_init(); + if (!self->llvm_graph_ptr) { + PyErr_SetString(PyExc_RuntimeError, "Failed to initialize LLVM graph"); + return -1; + } + + self->is_valid = true; + return 0; +} + +static PyObject* AdjacencyListGraphLLVM_add_vertex(AdjacencyListGraphLLVM* self, PyObject* args) { + const char* name; + + if (!PyArg_ParseTuple(args, "s", &name)) { + return nullptr; + } + + if (!self->is_valid || !self->llvm_graph_ptr) { + PyErr_SetString(PyExc_RuntimeError, "Invalid graph object"); + return nullptr; + } + + if (!check_llvm_backend()) { + PyErr_SetString(PyExc_RuntimeError, "LLVM backend not properly initialized"); + return nullptr; + } + + int name_len = safe_strlen(name); + int result = llvm_add_vertex(self->llvm_graph_ptr, name, name_len); + + if (result != 0) { + if (result == -1) { + PyErr_SetString(PyExc_ValueError, "Vertex with this name already exists"); + } else { + PyErr_Format(PyExc_RuntimeError, "Failed to add vertex (error code: %d)", result); + } + return nullptr; + } + + Py_RETURN_NONE; +} + +static PyObject* AdjacencyListGraphLLVM_add_edge(AdjacencyListGraphLLVM* self, PyObject* args) { + const char* source; + const char* target; + double weight = 1.0; + + if (!PyArg_ParseTuple(args, "ss|d", &source, &target, &weight)) { + return nullptr; + } + + if (!self->is_valid || !self->llvm_graph_ptr) { + PyErr_SetString(PyExc_RuntimeError, "Invalid graph object"); + return nullptr; + } + + if (!check_llvm_backend()) { + PyErr_SetString(PyExc_RuntimeError, "LLVM backend not properly initialized"); + return nullptr; + } + + int src_len = safe_strlen(source); + int tgt_len = safe_strlen(target); + int result = llvm_add_edge(self->llvm_graph_ptr, source, src_len, target, tgt_len, weight); + + if (result != 0) { + if (result == -1) { + PyErr_SetString(PyExc_ValueError, "Source vertex not found"); + } else if (result == -2) { + PyErr_SetString(PyExc_ValueError, "Target vertex not found"); + } else { + PyErr_Format(PyExc_RuntimeError, "Failed to add edge (error code: %d)", result); + } + return nullptr; + } + + Py_RETURN_NONE; +} + +static PyObject* AdjacencyListGraphLLVM_is_adjacent(AdjacencyListGraphLLVM* self, PyObject* args) { + const char* node1; + const char* node2; + + if (!PyArg_ParseTuple(args, "ss", &node1, &node2)) { + return nullptr; + } + + if (!self->is_valid || !self->llvm_graph_ptr) { + PyErr_SetString(PyExc_RuntimeError, "Invalid graph object"); + return nullptr; + } + + if (!check_llvm_backend()) { + PyErr_SetString(PyExc_RuntimeError, "LLVM backend not properly initialized"); + return nullptr; + } + + int node1_len = safe_strlen(node1); + int node2_len = safe_strlen(node2); + int result = llvm_is_adjacent(self->llvm_graph_ptr, node1, node1_len, node2, node2_len); + + if (result == 1) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } +} + +static PyObject* AdjacencyListGraphLLVM_remove_vertex(AdjacencyListGraphLLVM* self, PyObject* args) { + const char* name; + + if (!PyArg_ParseTuple(args, "s", &name)) { + return nullptr; + } + + if (!self->is_valid || !self->llvm_graph_ptr) { + PyErr_SetString(PyExc_RuntimeError, "Invalid graph object"); + return nullptr; + } + + if (!check_llvm_backend()) { + PyErr_SetString(PyExc_RuntimeError, "LLVM backend not properly initialized"); + return nullptr; + } + + int name_len = safe_strlen(name); + int result = llvm_remove_vertex(self->llvm_graph_ptr, name, name_len); + + if (result != 0) { + if (result == -1) { + PyErr_SetString(PyExc_ValueError, "Vertex not found"); + } else { + PyErr_Format(PyExc_RuntimeError, "Failed to remove vertex (error code: %d)", result); + } + return nullptr; + } + + Py_RETURN_NONE; +} + +static PyObject* AdjacencyListGraphLLVM_remove_edge(AdjacencyListGraphLLVM* self, PyObject* args) { + const char* source; + const char* target; + + if (!PyArg_ParseTuple(args, "ss", &source, &target)) { + return nullptr; + } + + if (!self->is_valid || !self->llvm_graph_ptr) { + PyErr_SetString(PyExc_RuntimeError, "Invalid graph object"); + return nullptr; + } + + if (!check_llvm_backend()) { + PyErr_SetString(PyExc_RuntimeError, "LLVM backend not properly initialized"); + return nullptr; + } + + int src_len = safe_strlen(source); + int tgt_len = safe_strlen(target); + int result = llvm_remove_edge(self->llvm_graph_ptr, source, src_len, target, tgt_len); + + if (result != 0) { + if (result == -1) { + PyErr_SetString(PyExc_ValueError, "Source vertex not found"); + } else if (result == -2) { + PyErr_SetString(PyExc_ValueError, "Target vertex not found"); + } else { + PyErr_Format(PyExc_RuntimeError, "Failed to remove edge (error code: %d)", result); + } + return nullptr; + } + + Py_RETURN_NONE; +} + +static PyMethodDef AdjacencyListGraphLLVM_methods[] = { + {"add_vertex", (PyCFunction)AdjacencyListGraphLLVM_add_vertex, METH_VARARGS, + "Add a vertex to the graph"}, + {"add_edge", (PyCFunction)AdjacencyListGraphLLVM_add_edge, METH_VARARGS, + "Add an edge to the graph"}, + {"is_adjacent", (PyCFunction)AdjacencyListGraphLLVM_is_adjacent, METH_VARARGS, + "Check if two vertices are adjacent"}, + {"remove_vertex", (PyCFunction)AdjacencyListGraphLLVM_remove_vertex, METH_VARARGS, + "Remove a vertex from the graph"}, + {"remove_edge", (PyCFunction)AdjacencyListGraphLLVM_remove_edge, METH_VARARGS, + "Remove an edge from the graph"}, + {nullptr} +}; + +PyTypeObject AdjacencyListGraphLLVMType = { + PyVarObject_HEAD_INIT(nullptr, 0) + "llvm_graph.AdjacencyListGraphLLVM", // tp_name + sizeof(AdjacencyListGraphLLVM), // tp_basicsize + 0, // tp_itemsize + (destructor)AdjacencyListGraphLLVM_dealloc, // tp_dealloc + 0, // tp_vectorcall_offset + 0, // tp_getattr + 0, // tp_setattr + 0, // tp_as_async + 0, // tp_repr + 0, // tp_as_number + 0, // tp_as_sequence + 0, // tp_as_mapping + 0, // tp_hash + 0, // tp_call + 0, // tp_str + 0, // tp_getattro + 0, // tp_setattro + 0, // tp_as_buffer + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, // tp_flags + "LLVM-backed adjacency list graph", // tp_doc + 0, // tp_traverse + 0, // tp_clear + 0, // tp_richcompare + 0, // tp_weaklistoffset + 0, // tp_iter + 0, // tp_iternext + AdjacencyListGraphLLVM_methods, // tp_methods + 0, // tp_members + 0, // tp_getset + 0, // tp_base + 0, // tp_dict + 0, // tp_descr_get + 0, // tp_descr_set + 0, // tp_dictoffset + (initproc)AdjacencyListGraphLLVM_init, // tp_init + 0, // tp_alloc + AdjacencyListGraphLLVM_new, // tp_new +}; diff --git a/pydatastructs/graphs/_backend/cpp/graph.cpp b/pydatastructs/graphs/_backend/cpp/graph.cpp index d41ca60f4..60b688b11 100644 --- a/pydatastructs/graphs/_backend/cpp/graph.cpp +++ b/pydatastructs/graphs/_backend/cpp/graph.cpp @@ -4,6 +4,7 @@ #include "AdjacencyMatrix.hpp" #include "AdjacencyListGraphNode.hpp" #include "AdjacencyMatrixGraphNode.hpp" +#include "AdjacencyListLLVM.hpp" #include "graph_bindings.hpp" #ifdef __cplusplus @@ -16,12 +17,18 @@ PyMODINIT_FUNC PyInit__graph(void); } #endif +static PyMethodDef module_methods[] = { + {"initialize_llvm_backend", initialize_llvm_backend, METH_VARARGS, + "Initialize LLVM backend with compiled function pointers"}, + {nullptr, nullptr, 0, nullptr} +}; + static struct PyModuleDef graph_module = { PyModuleDef_HEAD_INIT, "_graph", "C++ module for graphs", -1, - NULL, + module_methods, }; PyMODINIT_FUNC PyInit__graph(void) { @@ -39,6 +46,9 @@ PyMODINIT_FUNC PyInit__graph(void) { if (PyType_Ready(&AdjacencyMatrixGraphNodeType) < 0) return NULL; + if (PyType_Ready(&AdjacencyListGraphLLVMType) < 0) { + return nullptr; + } m = PyModule_Create(&graph_module); if (m == NULL) return NULL; @@ -64,5 +74,12 @@ PyMODINIT_FUNC PyInit__graph(void) { return NULL; } + Py_INCREF(&AdjacencyListGraphLLVMType); + if (PyModule_AddObject(m, "AdjacencyListGraphLLVM", (PyObject*)&AdjacencyListGraphLLVMType) < 0) { + Py_DECREF(&AdjacencyListGraphLLVMType); + Py_DECREF(m); + return nullptr; + } + return m; } diff --git a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py new file mode 100644 index 000000000..ced108cfe --- /dev/null +++ b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py @@ -0,0 +1,1406 @@ +import llvmlite.binding as llvm +import llvmlite.ir as ir +from llvmlite import ir +import ctypes +from ctypes import Structure, POINTER, c_void_p, c_int, c_char_p, c_double + +llvm.initialize_native_target() +llvm.initialize_native_asmprinter() + +class LLVMAdjacencyListGraph: + def __init__(self): + self.module = ir.Module(name="adjacency_list_graph") + self.builder = None + + self.void_type = ir.VoidType() + self.int_type = ir.IntType(32) + self.int64_type = ir.IntType(64) + self.int8_type = ir.IntType(8) + self.double_type = ir.DoubleType() + self.bool_type = ir.IntType(1) + + self.int_ptr = self.int_type.as_pointer() + self.char_ptr = self.int8_type.as_pointer() + self.void_ptr = self.int8_type.as_pointer() + + self._create_structures() + + self._create_function_declarations() + + self._create_graph_functions() + + def _create_structures(self): + + self.node_type = ir.LiteralStructType([ + self.int_type, + self.char_ptr, + self.int_type, + self.void_ptr, + self.int_type, + self.int_type + ]) + + self.edge_type = ir.LiteralStructType([ + self.node_type.as_pointer(), + self.node_type.as_pointer(), + self.double_type + ]) + + self.hash_entry_type = ir.LiteralStructType([ + self.char_ptr, + self.int_type, + self.void_ptr, + self.void_ptr + ]) + + self.graph_type = ir.LiteralStructType([ + self.node_type.as_pointer().as_pointer(), + self.int_type, + self.int_type, + self.void_ptr, + self.void_ptr, + self.int_type + ]) + + def _create_function_declarations(self): + + malloc_type = ir.FunctionType(self.void_ptr, [self.int64_type]) + self.malloc_func = ir.Function(self.module, malloc_type, name="malloc") + + free_type = ir.FunctionType(self.void_type, [self.void_ptr]) + self.free_func = ir.Function(self.module, free_type, name="free") + + memcpy_type = ir.FunctionType(self.void_ptr, [self.void_ptr, self.void_ptr, self.int64_type]) + self.memcpy_func = ir.Function(self.module, memcpy_type, name="memcpy") + + strlen_type = ir.FunctionType(self.int64_type, [self.char_ptr]) + self.strlen_func = ir.Function(self.module, strlen_type, name="strlen") + + def _create_graph_functions(self): + self._create_hash_functions() + self._create_hash_insert() + self._create_node_functions() + self._create_graph_init() + self._create_add_vertex() + self._create_add_edge() + self._create_is_adjacent() + self._create_hash_remove() + self._create_remove_vertex() + self._create_remove_edge() + self._create_graph_cleanup() + + def _compare_strings(self, str1, str2, length): + """Compare two strings byte by byte""" + + same_ptr = self.builder.icmp_signed('==', str1, str2) + + true_block = self.builder.block.parent.append_basic_block(name="strings_equal") + false_block = self.builder.block.parent.append_basic_block(name="strings_not_equal") + compare_block = self.builder.block.parent.append_basic_block(name="compare_bytes") + merge_block = self.builder.block.parent.append_basic_block(name="string_cmp_merge") + + self.builder.cbranch(same_ptr, true_block, compare_block) + + self.builder.position_at_end(compare_block) + i = self.builder.alloca(self.int_type, name="str_cmp_i") + self.builder.store(ir.Constant(self.int_type, 0), i) + + loop_block = self.builder.block.parent.append_basic_block(name="str_cmp_loop") + check_block = self.builder.block.parent.append_basic_block(name="str_cmp_check") + + self.builder.branch(loop_block) + + self.builder.position_at_end(loop_block) + i_val = self.builder.load(i) + loop_condition = self.builder.icmp_signed('<', i_val, length) + self.builder.cbranch(loop_condition, check_block, true_block) + + self.builder.position_at_end(check_block) + char1_ptr = self.builder.gep(str1, [i_val]) + char2_ptr = self.builder.gep(str2, [i_val]) + char1 = self.builder.load(char1_ptr) + char2 = self.builder.load(char2_ptr) + + chars_equal = self.builder.icmp_signed('==', char1, char2) + + next_char_block = self.builder.block.parent.append_basic_block(name="next_char") + self.builder.cbranch(chars_equal, next_char_block, false_block) + + self.builder.position_at_end(next_char_block) + next_i = self.builder.add(i_val, ir.Constant(self.int_type, 1)) + self.builder.store(next_i, i) + self.builder.branch(loop_block) + + self.builder.position_at_end(true_block) + result_true = ir.Constant(self.bool_type, 1) + self.builder.branch(merge_block) + + self.builder.position_at_end(false_block) + result_false = ir.Constant(self.bool_type, 0) + self.builder.branch(merge_block) + + self.builder.position_at_end(merge_block) + phi = self.builder.phi(self.bool_type, name="string_cmp_result") + phi.add_incoming(result_true, true_block) + phi.add_incoming(result_false, false_block) + + return phi + + def _create_hash_functions(self): + hash_func_type = ir.FunctionType(self.int_type, [self.char_ptr, self.int_type]) + self.hash_func = ir.Function(self.module, hash_func_type, name="hash_string") + + block = self.hash_func.append_basic_block(name="entry") + self.builder = ir.IRBuilder(block) + + str_ptr, str_len = self.hash_func.args + hash_val = self.builder.alloca(self.int_type, name="hash") + self.builder.store(ir.Constant(self.int_type, 5381), hash_val) + + i = self.builder.alloca(self.int_type, name="i") + self.builder.store(ir.Constant(self.int_type, 0), i) + + loop_block = self.hash_func.append_basic_block(name="loop") + end_block = self.hash_func.append_basic_block(name="end") + + self.builder.branch(loop_block) + self.builder.position_at_end(loop_block) + + i_val = self.builder.load(i) + cond = self.builder.icmp_signed('<', i_val, str_len) + + loop_body = self.hash_func.append_basic_block(name="loop_body") + self.builder.cbranch(cond, loop_body, end_block) + + self.builder.position_at_end(loop_body) + char_ptr = self.builder.gep(str_ptr, [i_val]) + char_val = self.builder.load(char_ptr) + char_ext = self.builder.zext(char_val, self.int_type) + + hash_current = self.builder.load(hash_val) + hash_shifted = self.builder.shl(hash_current, ir.Constant(self.int_type, 5)) + hash_new = self.builder.add(hash_shifted, hash_current) + hash_final = self.builder.add(hash_new, char_ext) + self.builder.store(hash_final, hash_val) + + i_next = self.builder.add(i_val, ir.Constant(self.int_type, 1)) + self.builder.store(i_next, i) + self.builder.branch(loop_block) + + self.builder.position_at_end(end_block) + result = self.builder.load(hash_val) + self.builder.ret(result) + + lookup_func_type = ir.FunctionType(self.void_ptr, [self.void_ptr, self.char_ptr, self.int_type]) + self.hash_lookup = ir.Function(self.module, lookup_func_type, name="hash_lookup") + + block = self.hash_lookup.append_basic_block(name="entry") + self.builder = ir.IRBuilder(block) + + table, key, key_len = self.hash_lookup.args + + current = self.builder.alloca(self.void_ptr) + self.builder.store(table, current) + + loop_block = self.hash_lookup.append_basic_block(name="loop") + check_block = self.hash_lookup.append_basic_block(name="check") + found_block = self.hash_lookup.append_basic_block(name="found") + not_found_block = self.hash_lookup.append_basic_block(name="not_found") + + self.builder.branch(loop_block) + self.builder.position_at_end(loop_block) + + current_val = self.builder.load(current) + is_null = self.builder.icmp_signed('==', current_val, ir.Constant(self.void_ptr, None)) + self.builder.cbranch(is_null, not_found_block, check_block) + + self.builder.position_at_end(check_block) + entry_ptr = self.builder.bitcast(current_val, self.hash_entry_type.as_pointer()) + entry_key_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 0)]) + entry_key = self.builder.load(entry_key_ptr) + entry_key_len_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) + entry_key_len = self.builder.load(entry_key_len_ptr) + + len_match = self.builder.icmp_signed('==', entry_key_len, key_len) + + next_entry_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + next_entry = self.builder.load(next_entry_ptr) + self.builder.store(next_entry, current) + + content_check_block = self.hash_lookup.append_basic_block(name="content_check") + self.builder.cbranch(len_match, content_check_block, loop_block) + + self.builder.position_at_end(content_check_block) + strings_match = self._compare_strings(entry_key, key, key_len) + self.builder.cbranch(strings_match, found_block, loop_block) + + self.builder.position_at_end(found_block) + value_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 2)]) + value = self.builder.load(value_ptr) + self.builder.ret(value) + + self.builder.position_at_end(not_found_block) + self.builder.ret(ir.Constant(self.void_ptr, None)) + + def _create_node_functions(self): + create_node_type = ir.FunctionType(self.node_type.as_pointer(), [self.char_ptr, self.int_type, self.int_type]) + self.create_node = ir.Function(self.module, create_node_type, name="create_node") + + block = self.create_node.append_basic_block(name="entry") + self.builder = ir.IRBuilder(block) + + name_ptr, name_len, node_id = self.create_node.args + + node_size = ir.Constant(self.int64_type, self.node_type.get_abi_size(llvm.create_target_data(""))) + node_mem = self.builder.call(self.malloc_func, [node_size]) + node_ptr = self.builder.bitcast(node_mem, self.node_type.as_pointer()) + + id_ptr = self.builder.gep(node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 0)]) + self.builder.store(node_id, id_ptr) + + name_field_ptr = self.builder.gep(node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) + self.builder.store(name_ptr, name_field_ptr) + + name_len_ptr = self.builder.gep(node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 2)]) + self.builder.store(name_len, name_len_ptr) + + adj_ptr = self.builder.gep(node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + self.builder.store(ir.Constant(self.void_ptr, None), adj_ptr) + + adj_count_ptr = self.builder.gep(node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 4)]) + self.builder.store(ir.Constant(self.int_type, 0), adj_count_ptr) + adj_cap_ptr = self.builder.gep(node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 5)]) + self.builder.store(ir.Constant(self.int_type, 0), adj_cap_ptr) + + self.builder.ret(node_ptr) + + def _create_graph_init(self): + init_type = ir.FunctionType(self.graph_type.as_pointer(), []) + self.graph_init = ir.Function(self.module, init_type, name="graph_init") + + block = self.graph_init.append_basic_block(name="entry") + self.builder = ir.IRBuilder(block) + + graph_size = ir.Constant(self.int64_type, self.graph_type.get_abi_size(llvm.create_target_data(""))) + graph_mem = self.builder.call(self.malloc_func, [graph_size]) + graph_ptr = self.builder.bitcast(graph_mem, self.graph_type.as_pointer()) + + nodes_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 0)]) + self.builder.store(ir.Constant(self.node_type.as_pointer().as_pointer(), None), nodes_ptr) + + count_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) + self.builder.store(ir.Constant(self.int_type, 0), count_ptr) + + cap_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 2)]) + self.builder.store(ir.Constant(self.int_type, 0), cap_ptr) + + node_map_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + self.builder.store(ir.Constant(self.void_ptr, None), node_map_ptr) + + edge_map_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 4)]) + self.builder.store(ir.Constant(self.void_ptr, None), edge_map_ptr) + + next_id_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 5)]) + self.builder.store(ir.Constant(self.int_type, 0), next_id_ptr) + + self.builder.ret(graph_ptr) + + def _create_add_vertex(self): + add_vertex_type = ir.FunctionType(self.int_type, [self.graph_type.as_pointer(), self.char_ptr, self.int_type]) + self.add_vertex = ir.Function(self.module, add_vertex_type, name="add_vertex") + + block = self.add_vertex.append_basic_block(name="entry") + self.builder = ir.IRBuilder(block) + + graph_ptr, name_ptr, name_len = self.add_vertex.args + + node_map_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + node_map = self.builder.load(node_map_ptr) + existing_node = self.builder.call(self.hash_lookup, [node_map, name_ptr, name_len]) + + exists_block = self.add_vertex.append_basic_block(name="node_exists") + create_block = self.add_vertex.append_basic_block(name="create_node") + + is_null = self.builder.icmp_signed('==', existing_node, ir.Constant(self.void_ptr, None)) + self.builder.cbranch(is_null, create_block, exists_block) + + self.builder.position_at_end(exists_block) + self.builder.ret(ir.Constant(self.int_type, -1)) + + self.builder.position_at_end(create_block) + + next_id_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 5)]) + current_id = self.builder.load(next_id_ptr) + new_id = self.builder.add(current_id, ir.Constant(self.int_type, 1)) + self.builder.store(new_id, next_id_ptr) + + name_size = self.builder.add(name_len, ir.Constant(self.int_type, 1)) + name_size_64 = self.builder.zext(name_size, self.int64_type) + name_copy = self.builder.call(self.malloc_func, [name_size_64]) + name_copy_typed = self.builder.bitcast(name_copy, self.char_ptr) + + name_len_64 = self.builder.zext(name_len, self.int64_type) + self.builder.call(self.memcpy_func, [name_copy, name_ptr, name_len_64]) + + null_term_ptr = self.builder.gep(name_copy_typed, [name_len]) + self.builder.store(ir.Constant(self.int8_type, 0), null_term_ptr) + + node_ptr = self.builder.call(self.create_node, [name_copy_typed, name_len, current_id]) + + nodes_array_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 0)]) + count_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) + capacity_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 2)]) + + current_count = self.builder.load(count_ptr) + current_capacity = self.builder.load(capacity_ptr) + + resize_block = self.add_vertex.append_basic_block(name="resize_array") + add_node_block = self.add_vertex.append_basic_block(name="add_node") + + needs_resize = self.builder.icmp_signed('>=', current_count, current_capacity) + self.builder.cbranch(needs_resize, resize_block, add_node_block) + + self.builder.position_at_end(resize_block) + new_capacity = self.builder.select( + self.builder.icmp_signed('==', current_capacity, ir.Constant(self.int_type, 0)), + ir.Constant(self.int_type, 4), + self.builder.mul(current_capacity, ir.Constant(self.int_type, 2)) + ) + + ptr_size = ir.Constant(self.int64_type, 8) + new_size_64 = self.builder.mul(self.builder.zext(new_capacity, self.int64_type), ptr_size) + new_array_mem = self.builder.call(self.malloc_func, [new_size_64]) + new_array = self.builder.bitcast(new_array_mem, self.node_type.as_pointer().as_pointer()) + + copy_block = self.add_vertex.append_basic_block(name="copy_nodes") + no_copy_block = self.add_vertex.append_basic_block(name="no_copy") + + has_existing = self.builder.icmp_signed('>', current_count, ir.Constant(self.int_type, 0)) + self.builder.cbranch(has_existing, copy_block, no_copy_block) + + self.builder.position_at_end(copy_block) + old_array = self.builder.load(nodes_array_ptr) + old_size_64 = self.builder.mul(self.builder.zext(current_count, self.int64_type), ptr_size) + old_array_void = self.builder.bitcast(old_array, self.void_ptr) + new_array_void = self.builder.bitcast(new_array, self.void_ptr) + self.builder.call(self.memcpy_func, [new_array_void, old_array_void, old_size_64]) + + self.builder.call(self.free_func, [old_array_void]) + self.builder.branch(no_copy_block) + + self.builder.position_at_end(no_copy_block) + self.builder.store(new_array, nodes_array_ptr) + self.builder.store(new_capacity, capacity_ptr) + self.builder.branch(add_node_block) + + self.builder.position_at_end(add_node_block) + nodes_array = self.builder.load(nodes_array_ptr) + current_count_final = self.builder.load(count_ptr) + node_slot = self.builder.gep(nodes_array, [current_count_final]) + self.builder.store(node_ptr, node_slot) + + new_count = self.builder.add(current_count_final, ir.Constant(self.int_type, 1)) + self.builder.store(new_count, count_ptr) + + self.builder.call(self.hash_insert, [node_map_ptr, name_copy_typed, name_len, + self.builder.bitcast(node_ptr, self.void_ptr)]) + + self.builder.ret(ir.Constant(self.int_type, 0)) + + def _create_add_edge(self): + add_edge_type = ir.FunctionType(self.int_type, + [self.graph_type.as_pointer(), self.char_ptr, self.int_type, + self.char_ptr, self.int_type, self.double_type]) + self.add_edge = ir.Function(self.module, add_edge_type, name="add_edge") + + block = self.add_edge.append_basic_block(name="entry") + self.builder = ir.IRBuilder(block) + + graph_ptr, src_name, src_len, tgt_name, tgt_len, weight = self.add_edge.args + + node_map_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + node_map = self.builder.load(node_map_ptr) + + src_node_void = self.builder.call(self.hash_lookup, [node_map, src_name, src_len]) + src_exists = self.builder.icmp_signed('!=', src_node_void, ir.Constant(self.void_ptr, None)) + + src_found_block = self.add_edge.append_basic_block(name="src_found") + error_block = self.add_edge.append_basic_block(name="error") + + self.builder.cbranch(src_exists, src_found_block, error_block) + + self.builder.position_at_end(error_block) + self.builder.ret(ir.Constant(self.int_type, -1)) + + self.builder.position_at_end(src_found_block) + tgt_node_void = self.builder.call(self.hash_lookup, [node_map, tgt_name, tgt_len]) + tgt_exists = self.builder.icmp_signed('!=', tgt_node_void, ir.Constant(self.void_ptr, None)) + + tgt_found_block = self.add_edge.append_basic_block(name="tgt_found") + error2_block = self.add_edge.append_basic_block(name="error2") + + self.builder.cbranch(tgt_exists, tgt_found_block, error2_block) + + self.builder.position_at_end(error2_block) + self.builder.ret(ir.Constant(self.int_type, -2)) + + self.builder.position_at_end(tgt_found_block) + src_node_ptr = self.builder.bitcast(src_node_void, self.node_type.as_pointer()) + tgt_node_ptr = self.builder.bitcast(tgt_node_void, self.node_type.as_pointer()) + + edge_key_len = self.builder.add(self.builder.add(src_len, tgt_len), ir.Constant(self.int_type, 2)) + edge_key_len_64 = self.builder.zext(edge_key_len, self.int64_type) + edge_key_mem = self.builder.call(self.malloc_func, [edge_key_len_64]) + edge_key = self.builder.bitcast(edge_key_mem, self.char_ptr) + + src_len_64 = self.builder.zext(src_len, self.int64_type) + self.builder.call(self.memcpy_func, [edge_key, src_name, src_len_64]) + + underscore_ptr = self.builder.gep(edge_key, [src_len]) + self.builder.store(ir.Constant(self.int8_type, ord('_')), underscore_ptr) + + tgt_start_ptr = self.builder.gep(edge_key, [self.builder.add(src_len, ir.Constant(self.int_type, 1))]) + tgt_len_64 = self.builder.zext(tgt_len, self.int64_type) + self.builder.call(self.memcpy_func, [tgt_start_ptr, tgt_name, tgt_len_64]) + + final_key_len = self.builder.sub(edge_key_len, ir.Constant(self.int_type, 1)) + null_ptr = self.builder.gep(edge_key, [final_key_len]) + self.builder.store(ir.Constant(self.int8_type, 0), null_ptr) + + edge_map_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 4)]) + edge_map = self.builder.load(edge_map_ptr) + existing_edge = self.builder.call(self.hash_lookup, [edge_map, edge_key, final_key_len]) + + edge_exists_block = self.add_edge.append_basic_block(name="edge_exists") + create_edge_block = self.add_edge.append_basic_block(name="create_edge") + + edge_exists = self.builder.icmp_signed('!=', existing_edge, ir.Constant(self.void_ptr, None)) + self.builder.cbranch(edge_exists, edge_exists_block, create_edge_block) + + self.builder.position_at_end(edge_exists_block) + existing_edge_ptr = self.builder.bitcast(existing_edge, self.edge_type.as_pointer()) + weight_ptr = self.builder.gep(existing_edge_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 2)]) + self.builder.store(weight, weight_ptr) + + self.builder.call(self.free_func, [edge_key_mem]) + self.builder.ret(ir.Constant(self.int_type, 0)) + + self.builder.position_at_end(create_edge_block) + + edge_size = ir.Constant(self.int64_type, self.edge_type.get_abi_size(llvm.create_target_data(""))) + edge_mem = self.builder.call(self.malloc_func, [edge_size]) + edge_ptr = self.builder.bitcast(edge_mem, self.edge_type.as_pointer()) + + src_field_ptr = self.builder.gep(edge_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 0)]) + self.builder.store(src_node_ptr, src_field_ptr) + + tgt_field_ptr = self.builder.gep(edge_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) + self.builder.store(tgt_node_ptr, tgt_field_ptr) + + weight_field_ptr = self.builder.gep(edge_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 2)]) + self.builder.store(weight, weight_field_ptr) + + self.builder.call(self.hash_insert, [edge_map_ptr, edge_key, final_key_len, + self.builder.bitcast(edge_ptr, self.void_ptr)]) + + self._add_to_adjacency_list(src_node_ptr, tgt_node_ptr) + + self.builder.ret(ir.Constant(self.int_type, 0)) + + def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): + adj_list_ptr = self.builder.gep(src_node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + adj_count_ptr = self.builder.gep(src_node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 4)]) + adj_cap_ptr = self.builder.gep(src_node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 5)]) + + current_count = self.builder.load(adj_count_ptr) + current_capacity = self.builder.load(adj_cap_ptr) + + resize_adj_block = self.builder.block.parent.append_basic_block(name="resize_adj") + add_adj_block = self.builder.block.parent.append_basic_block(name="add_adj") + + needs_resize = self.builder.icmp_signed('>=', current_count, current_capacity) + self.builder.cbranch(needs_resize, resize_adj_block, add_adj_block) + + self.builder.position_at_end(resize_adj_block) + new_capacity = self.builder.select( + self.builder.icmp_signed('==', current_capacity, ir.Constant(self.int_type, 0)), + ir.Constant(self.int_type, 4), + self.builder.mul(current_capacity, ir.Constant(self.int_type, 2)) + ) + + ptr_size = ir.Constant(self.int64_type, 8) + new_adj_size_64 = self.builder.mul(self.builder.zext(new_capacity, self.int64_type), ptr_size) + new_adj_mem = self.builder.call(self.malloc_func, [new_adj_size_64]) + new_adj_array = self.builder.bitcast(new_adj_mem, self.void_ptr) + + copy_adj_block = self.builder.block.parent.append_basic_block(name="copy_adj") + no_copy_adj_block = self.builder.block.parent.append_basic_block(name="no_copy_adj") + + has_existing_adj = self.builder.icmp_signed('>', current_count, ir.Constant(self.int_type, 0)) + self.builder.cbranch(has_existing_adj, copy_adj_block, no_copy_adj_block) + + self.builder.position_at_end(copy_adj_block) + old_adj_array = self.builder.load(adj_list_ptr) + old_adj_size_64 = self.builder.mul(self.builder.zext(current_count, self.int64_type), ptr_size) + self.builder.call(self.memcpy_func, [new_adj_array, old_adj_array, old_adj_size_64]) + + self.builder.call(self.free_func, [old_adj_array]) + self.builder.branch(no_copy_adj_block) + + self.builder.position_at_end(no_copy_adj_block) + self.builder.store(new_adj_array, adj_list_ptr) + self.builder.store(new_capacity, adj_cap_ptr) + self.builder.branch(add_adj_block) + + self.builder.position_at_end(add_adj_block) + adj_array = self.builder.load(adj_list_ptr) + current_count_final = self.builder.load(adj_count_ptr) + + offset_64 = self.builder.mul(self.builder.zext(current_count_final, self.int64_type), ptr_size) + tgt_slot_ptr = self.builder.gep(adj_array, [offset_64]) + tgt_slot_typed = self.builder.bitcast(tgt_slot_ptr, self.node_type.as_pointer().as_pointer()) + self.builder.store(tgt_node_ptr, tgt_slot_typed) + + new_adj_count = self.builder.add(current_count_final, ir.Constant(self.int_type, 1)) + self.builder.store(new_adj_count, adj_count_ptr) + + def _create_hash_insert(self): + insert_func_type = ir.FunctionType(self.int_type, + [self.void_ptr.as_pointer(), self.char_ptr, self.int_type, self.void_ptr]) + self.hash_insert = ir.Function(self.module, insert_func_type, name="hash_insert") + + block = self.hash_insert.append_basic_block(name="entry") + self.builder = ir.IRBuilder(block) + + table_ptr, key, key_len, value = self.hash_insert.args + + entry_size = ir.Constant(self.int64_type, self.hash_entry_type.get_abi_size(llvm.create_target_data(""))) + entry_mem = self.builder.call(self.malloc_func, [entry_size]) + entry_ptr = self.builder.bitcast(entry_mem, self.hash_entry_type.as_pointer()) + + key_field_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 0)]) + self.builder.store(key, key_field_ptr) + + key_len_field_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) + self.builder.store(key_len, key_len_field_ptr) + + value_field_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 2)]) + self.builder.store(value, value_field_ptr) + + old_head = self.builder.load(table_ptr) + next_field_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + self.builder.store(old_head, next_field_ptr) + + entry_void = self.builder.bitcast(entry_ptr, self.void_ptr) + self.builder.store(entry_void, table_ptr) + + self.builder.ret(ir.Constant(self.int_type, 0)) + + def _create_is_adjacent(self): + + is_adj_type = ir.FunctionType(self.bool_type, + [self.graph_type.as_pointer(), self.char_ptr, self.int_type, self.char_ptr, self.int_type]) + self.is_adjacent = ir.Function(self.module, is_adj_type, name="is_adjacent") + + block = self.is_adjacent.append_basic_block(name="entry") + self.builder = ir.IRBuilder(block) + + graph_ptr, node1_name, node1_name_len, node2_name, node2_name_len = self.is_adjacent.args + + node_map_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + node_map = self.builder.load(node_map_ptr) + node1_void = self.builder.call(self.hash_lookup, [node_map, node1_name, node1_name_len]) + + node1_found_block = self.is_adjacent.append_basic_block(name="node1_found") + false_block = self.is_adjacent.append_basic_block(name="return_false") + + node1_exists = self.builder.icmp_signed('!=', node1_void, ir.Constant(self.void_ptr, None)) + self.builder.cbranch(node1_exists, node1_found_block, false_block) + + self.builder.position_at_end(node1_found_block) + node2_void = self.builder.call(self.hash_lookup, [node_map, node2_name, node2_name_len]) + node2_exists = self.builder.icmp_signed('!=', node2_void, ir.Constant(self.void_ptr, None)) + + check_adjacency_block = self.is_adjacent.append_basic_block(name="check_adjacency") + self.builder.cbranch(node2_exists, check_adjacency_block, false_block) + + self.builder.position_at_end(check_adjacency_block) + node1_ptr = self.builder.bitcast(node1_void, self.node_type.as_pointer()) + node2_ptr = self.builder.bitcast(node2_void, self.node_type.as_pointer()) + + adj_list_ptr = self.builder.gep(node1_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + adj_count_ptr = self.builder.gep(node1_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 4)]) + + adj_list = self.builder.load(adj_list_ptr) + adj_count = self.builder.load(adj_count_ptr) + + adj_exists = self.builder.icmp_signed('!=', adj_list, ir.Constant(self.void_ptr, None)) + + search_adj_block = self.is_adjacent.append_basic_block(name="search_adjacency") + self.builder.cbranch(adj_exists, search_adj_block, false_block) + + self.builder.position_at_end(search_adj_block) + i = self.builder.alloca(self.int_type, name="adj_search_i") + self.builder.store(ir.Constant(self.int_type, 0), i) + + adj_loop_block = self.is_adjacent.append_basic_block(name="adj_search_loop") + adj_check_block = self.is_adjacent.append_basic_block(name="adj_check_node") + true_block = self.is_adjacent.append_basic_block(name="return_true") + adj_next_block = self.is_adjacent.append_basic_block(name="adj_next") + + self.builder.branch(adj_loop_block) + + self.builder.position_at_end(adj_loop_block) + i_val = self.builder.load(i) + loop_condition = self.builder.icmp_signed('<', i_val, adj_count) + self.builder.cbranch(loop_condition, adj_check_block, false_block) + + self.builder.position_at_end(adj_check_block) + ptr_size = ir.Constant(self.int64_type, 8) + offset_64 = self.builder.mul(self.builder.zext(i_val, self.int64_type), ptr_size) + adj_entry_ptr = self.builder.gep(adj_list, [offset_64]) + adj_entry_typed = self.builder.bitcast(adj_entry_ptr, self.node_type.as_pointer().as_pointer()) + adj_node = self.builder.load(adj_entry_typed) + + nodes_match = self.builder.icmp_signed('==', adj_node, node2_ptr) + self.builder.cbranch(nodes_match, true_block, adj_next_block) + + self.builder.position_at_end(adj_next_block) + next_i = self.builder.add(i_val, ir.Constant(self.int_type, 1)) + self.builder.store(next_i, i) + self.builder.branch(adj_loop_block) + + self.builder.position_at_end(true_block) + self.builder.ret(ir.Constant(self.bool_type, 1)) + + self.builder.position_at_end(false_block) + self.builder.ret(ir.Constant(self.bool_type, 0)) + + def _create_remove_vertex(self): + + remove_vertex_type = ir.FunctionType(self.int_type, + [self.graph_type.as_pointer(), self.char_ptr, self.int_type]) + self.remove_vertex = ir.Function(self.module, remove_vertex_type, name="remove_vertex") + + block = self.remove_vertex.append_basic_block(name="entry") + self.builder = ir.IRBuilder(block) + + graph_ptr, name_ptr, name_len = self.remove_vertex.args + + node_map_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + node_map = self.builder.load(node_map_ptr) + node_void = self.builder.call(self.hash_lookup, [node_map, name_ptr, name_len]) + + node_found_block = self.remove_vertex.append_basic_block(name="node_found") + error_block = self.remove_vertex.append_basic_block(name="error") + + node_exists = self.builder.icmp_signed('!=', node_void, ir.Constant(self.void_ptr, None)) + self.builder.cbranch(node_exists, node_found_block, error_block) + + self.builder.position_at_end(error_block) + self.builder.ret(ir.Constant(self.int_type, -1)) + + self.builder.position_at_end(node_found_block) + node_to_remove = self.builder.bitcast(node_void, self.node_type.as_pointer()) + + node_id_ptr = self.builder.gep(node_to_remove, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 0)]) + node_id = self.builder.load(node_id_ptr) + + self._remove_from_nodes_array(graph_ptr, node_to_remove) + + self.builder.call(self.hash_remove, [node_map_ptr, name_ptr, name_len]) + + self._remove_all_edges_for_vertex(graph_ptr, name_ptr, name_len) + + self._remove_from_all_adjacency_lists(graph_ptr, name_ptr, name_len) + + adj_list_ptr = self.builder.gep(node_to_remove, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + adj_list = self.builder.load(adj_list_ptr) + adj_list_not_null = self.builder.icmp_signed('!=', adj_list, ir.Constant(self.void_ptr, None)) + + free_adj_block = self.remove_vertex.append_basic_block(name="free_adj") + free_node_block = self.remove_vertex.append_basic_block(name="free_node") + + self.builder.cbranch(adj_list_not_null, free_adj_block, free_node_block) + + self.builder.position_at_end(free_adj_block) + self.builder.call(self.free_func, [adj_list]) + self.builder.branch(free_node_block) + + self.builder.position_at_end(free_node_block) + node_name_ptr = self.builder.gep(node_to_remove, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) + node_name = self.builder.load(node_name_ptr) + node_name_void = self.builder.bitcast(node_name, self.void_ptr) + self.builder.call(self.free_func, [node_name_void]) + + node_void_for_free = self.builder.bitcast(node_to_remove, self.void_ptr) + self.builder.call(self.free_func, [node_void_for_free]) + + self.builder.ret(ir.Constant(self.int_type, 0)) + + def _create_remove_edge(self): + remove_edge_type = ir.FunctionType(self.int_type, + [self.graph_type.as_pointer(), self.char_ptr, self.int_type, self.char_ptr, self.int_type]) + self.remove_edge = ir.Function(self.module, remove_edge_type, name="remove_edge") + + block = self.remove_edge.append_basic_block(name="entry") + self.builder = ir.IRBuilder(block) + + graph_ptr, src_name, src_len, tgt_name, tgt_len = self.remove_edge.args + node_map_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + node_map = self.builder.load(node_map_ptr) + + src_node_void = self.builder.call(self.hash_lookup, [node_map, src_name, src_len]) + src_exists = self.builder.icmp_signed('!=', src_node_void, ir.Constant(self.void_ptr, None)) + + src_found_block = self.remove_edge.append_basic_block(name="src_found") + error_block = self.remove_edge.append_basic_block(name="error") + + self.builder.cbranch(src_exists, src_found_block, error_block) + self.builder.position_at_end(error_block) + self.builder.ret(ir.Constant(self.int_type, -1)) + self.builder.position_at_end(src_found_block) + tgt_node_void = self.builder.call(self.hash_lookup, [node_map, tgt_name, tgt_len]) + tgt_exists = self.builder.icmp_signed('!=', tgt_node_void, ir.Constant(self.void_ptr, None)) + + both_found_block = self.remove_edge.append_basic_block(name="both_found") + error2_block = self.remove_edge.append_basic_block(name="error2") + + self.builder.cbranch(tgt_exists, both_found_block, error2_block) + self.builder.position_at_end(error2_block) + self.builder.ret(ir.Constant(self.int_type, -2)) + + self.builder.position_at_end(both_found_block) + src_node_ptr = self.builder.bitcast(src_node_void, self.node_type.as_pointer()) + tgt_node_ptr = self.builder.bitcast(tgt_node_void, self.node_type.as_pointer()) + + edge_key_len = self.builder.add(self.builder.add(src_len, tgt_len), ir.Constant(self.int_type, 2)) + edge_key_len_64 = self.builder.zext(edge_key_len, self.int64_type) + edge_key_mem = self.builder.call(self.malloc_func, [edge_key_len_64]) + edge_key = self.builder.bitcast(edge_key_mem, self.char_ptr) + + src_len_64 = self.builder.zext(src_len, self.int64_type) + self.builder.call(self.memcpy_func, [edge_key, src_name, src_len_64]) + + underscore_ptr = self.builder.gep(edge_key, [src_len]) + self.builder.store(ir.Constant(self.int8_type, ord('_')), underscore_ptr) + + tgt_start_ptr = self.builder.gep(edge_key, [self.builder.add(src_len, ir.Constant(self.int_type, 1))]) + tgt_len_64 = self.builder.zext(tgt_len, self.int64_type) + self.builder.call(self.memcpy_func, [tgt_start_ptr, tgt_name, tgt_len_64]) + + final_key_len = self.builder.sub(edge_key_len, ir.Constant(self.int_type, 1)) + null_ptr = self.builder.gep(edge_key, [final_key_len]) + self.builder.store(ir.Constant(self.int8_type, 0), null_ptr) + + edge_map_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 4)]) + edge_map = self.builder.load(edge_map_ptr) + + edge_to_remove = self.builder.call(self.hash_lookup, [edge_map, edge_key, final_key_len]) + edge_exists = self.builder.icmp_signed('!=', edge_to_remove, ir.Constant(self.void_ptr, None)) + + remove_edge_block = self.remove_edge.append_basic_block(name="remove_edge_data") + cleanup_block = self.remove_edge.append_basic_block(name="cleanup") + + self.builder.cbranch(edge_exists, remove_edge_block, cleanup_block) + + self.builder.position_at_end(remove_edge_block) + self.builder.call(self.free_func, [edge_to_remove]) + self.builder.branch(cleanup_block) + + self.builder.position_at_end(cleanup_block) + self.builder.call(self.hash_remove, [edge_map_ptr, edge_key, final_key_len]) + + self._remove_from_adjacency_list(src_node_ptr, tgt_node_ptr) + + self.builder.call(self.free_func, [edge_key_mem]) + + self.builder.ret(ir.Constant(self.int_type, 0)) + + def _remove_from_nodes_array(self, graph_ptr, node_to_remove): + nodes_array_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 0)]) + count_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) + + nodes_array = self.builder.load(nodes_array_ptr) + current_count = self.builder.load(count_ptr) + + i = self.builder.alloca(self.int_type, name="i") + self.builder.store(ir.Constant(self.int_type, 0), i) + + loop_block = self.builder.block.parent.append_basic_block(name="find_loop") + check_block = self.builder.block.parent.append_basic_block(name="check_node") + found_block = self.builder.block.parent.append_basic_block(name="found_node") + shift_block = self.builder.block.parent.append_basic_block(name="shift_elements") + done_block = self.builder.block.parent.append_basic_block(name="done_remove") + + self.builder.branch(loop_block) + + self.builder.position_at_end(loop_block) + i_val = self.builder.load(i) + loop_condition = self.builder.icmp_signed('<', i_val, current_count) + self.builder.cbranch(loop_condition, check_block, done_block) + + self.builder.position_at_end(check_block) + current_node_ptr = self.builder.gep(nodes_array, [i_val]) + current_node = self.builder.load(current_node_ptr) + is_match = self.builder.icmp_signed('==', current_node, node_to_remove) + + next_iter_block = self.builder.block.parent.append_basic_block(name="next_iter") + self.builder.cbranch(is_match, found_block, next_iter_block) + + self.builder.position_at_end(next_iter_block) + next_i = self.builder.add(i_val, ir.Constant(self.int_type, 1)) + self.builder.store(next_i, i) + self.builder.branch(loop_block) + + self.builder.position_at_end(found_block) + shift_i = self.builder.alloca(self.int_type, name="shift_i") + self.builder.store(i_val, shift_i) + self.builder.branch(shift_block) + + self.builder.position_at_end(shift_block) + shift_i_val = self.builder.load(shift_i) + next_idx = self.builder.add(shift_i_val, ir.Constant(self.int_type, 1)) + shift_condition = self.builder.icmp_signed('<', next_idx, current_count) + + do_shift_block = self.builder.block.parent.append_basic_block(name="do_shift") + finish_shift_block = self.builder.block.parent.append_basic_block(name="finish_shift") + + self.builder.cbranch(shift_condition, do_shift_block, finish_shift_block) + + self.builder.position_at_end(do_shift_block) + src_ptr = self.builder.gep(nodes_array, [next_idx]) + dst_ptr = self.builder.gep(nodes_array, [shift_i_val]) + node_to_shift = self.builder.load(src_ptr) + self.builder.store(node_to_shift, dst_ptr) + + self.builder.store(next_idx, shift_i) + self.builder.branch(shift_block) + + self.builder.position_at_end(finish_shift_block) + new_count = self.builder.sub(current_count, ir.Constant(self.int_type, 1)) + self.builder.store(new_count, count_ptr) + self.builder.branch(done_block) + + self.builder.position_at_end(done_block) + + def _remove_from_all_adjacency_lists(self, graph_ptr, vertex_name, vertex_name_len): + nodes_array_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 0)]) + count_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) + + nodes_array = self.builder.load(nodes_array_ptr) + current_count = self.builder.load(count_ptr) + + i = self.builder.alloca(self.int_type, name="node_i") + self.builder.store(ir.Constant(self.int_type, 0), i) + + node_loop_block = self.builder.block.parent.append_basic_block(name="node_loop") + process_node_block = self.builder.block.parent.append_basic_block(name="process_node") + next_node_block = self.builder.block.parent.append_basic_block(name="next_node") + done_adj_cleanup = self.builder.block.parent.append_basic_block(name="done_adj_cleanup") + + self.builder.branch(node_loop_block) + + self.builder.position_at_end(node_loop_block) + i_val = self.builder.load(i) + loop_condition = self.builder.icmp_signed('<', i_val, current_count) + self.builder.cbranch(loop_condition, process_node_block, done_adj_cleanup) + + self.builder.position_at_end(process_node_block) + current_node_ptr = self.builder.gep(nodes_array, [i_val]) + current_node = self.builder.load(current_node_ptr) + + self._remove_vertex_from_node_adjacency(current_node, vertex_name, vertex_name_len) + + self.builder.branch(next_node_block) + + self.builder.position_at_end(next_node_block) + next_i = self.builder.add(i_val, ir.Constant(self.int_type, 1)) + self.builder.store(next_i, i) + self.builder.branch(node_loop_block) + + self.builder.position_at_end(done_adj_cleanup) + + def _remove_from_adjacency_list(self, src_node_ptr, tgt_node_ptr): + + adj_list_ptr = self.builder.gep(src_node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + adj_count_ptr = self.builder.gep(src_node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 4)]) + + adj_list = self.builder.load(adj_list_ptr) + adj_count = self.builder.load(adj_count_ptr) + + i = self.builder.alloca(self.int_type, name="adj_i") + self.builder.store(ir.Constant(self.int_type, 0), i) + + adj_loop_block = self.builder.block.parent.append_basic_block(name="adj_find_loop") + adj_check_block = self.builder.block.parent.append_basic_block(name="adj_check") + adj_found_block = self.builder.block.parent.append_basic_block(name="adj_found") + adj_shift_block = self.builder.block.parent.append_basic_block(name="adj_shift") + adj_next_block = self.builder.block.parent.append_basic_block(name="adj_next") + adj_done_block = self.builder.block.parent.append_basic_block(name="adj_done") + + self.builder.branch(adj_loop_block) + + self.builder.position_at_end(adj_loop_block) + i_val = self.builder.load(i) + loop_condition = self.builder.icmp_signed('<', i_val, adj_count) + self.builder.cbranch(loop_condition, adj_check_block, adj_done_block) + + self.builder.position_at_end(adj_check_block) + ptr_size = ir.Constant(self.int64_type, 8) + offset_64 = self.builder.mul(self.builder.zext(i_val, self.int64_type), ptr_size) + adj_entry_ptr = self.builder.gep(adj_list, [offset_64]) + adj_entry_typed = self.builder.bitcast(adj_entry_ptr, self.node_type.as_pointer().as_pointer()) + adj_node = self.builder.load(adj_entry_typed) + + is_target = self.builder.icmp_signed('==', adj_node, tgt_node_ptr) + self.builder.cbranch(is_target, adj_found_block, adj_next_block) + + self.builder.position_at_end(adj_found_block) + shift_i = self.builder.alloca(self.int_type, name="adj_shift_i") + self.builder.store(i_val, shift_i) + self.builder.branch(adj_shift_block) + + self.builder.position_at_end(adj_shift_block) + shift_i_val = self.builder.load(shift_i) + next_shift_idx = self.builder.add(shift_i_val, ir.Constant(self.int_type, 1)) + shift_condition = self.builder.icmp_signed('<', next_shift_idx, adj_count) + + do_adj_shift_block = self.builder.block.parent.append_basic_block(name="do_adj_shift") + finish_adj_shift_block = self.builder.block.parent.append_basic_block(name="finish_adj_shift") + + self.builder.cbranch(shift_condition, do_adj_shift_block, finish_adj_shift_block) + + self.builder.position_at_end(do_adj_shift_block) + src_offset_64 = self.builder.mul(self.builder.zext(next_shift_idx, self.int64_type), ptr_size) + dst_offset_64 = self.builder.mul(self.builder.zext(shift_i_val, self.int64_type), ptr_size) + + src_adj_ptr = self.builder.gep(adj_list, [src_offset_64]) + dst_adj_ptr = self.builder.gep(adj_list, [dst_offset_64]) + + src_adj_typed = self.builder.bitcast(src_adj_ptr, self.node_type.as_pointer().as_pointer()) + dst_adj_typed = self.builder.bitcast(dst_adj_ptr, self.node_type.as_pointer().as_pointer()) + + node_to_shift = self.builder.load(src_adj_typed) + self.builder.store(node_to_shift, dst_adj_typed) + + self.builder.store(next_shift_idx, shift_i) + self.builder.branch(adj_shift_block) + + self.builder.position_at_end(finish_adj_shift_block) + new_adj_count = self.builder.sub(adj_count, ir.Constant(self.int_type, 1)) + self.builder.store(new_adj_count, adj_count_ptr) + self.builder.branch(adj_done_block) + + self.builder.position_at_end(adj_next_block) + next_i = self.builder.add(i_val, ir.Constant(self.int_type, 1)) + self.builder.store(next_i, i) + self.builder.branch(adj_loop_block) + + self.builder.position_at_end(adj_done_block) + + def _create_hash_remove(self): + remove_func_type = ir.FunctionType(self.int_type, + [self.void_ptr.as_pointer(), self.char_ptr, self.int_type]) + self.hash_remove = ir.Function(self.module, remove_func_type, name="hash_remove") + + block = self.hash_remove.append_basic_block(name="entry") + self.builder = ir.IRBuilder(block) + + table_ptr, key, key_len = self.hash_remove.args + + table_head = self.builder.load(table_ptr) + is_empty = self.builder.icmp_signed('==', table_head, ir.Constant(self.void_ptr, None)) + + empty_block = self.hash_remove.append_basic_block(name="empty_table") + search_block = self.hash_remove.append_basic_block(name="search_table") + + self.builder.cbranch(is_empty, empty_block, search_block) + + self.builder.position_at_end(empty_block) + self.builder.ret(ir.Constant(self.int_type, -1)) + + self.builder.position_at_end(search_block) + current = self.builder.alloca(self.void_ptr, name="current") + prev = self.builder.alloca(self.void_ptr.as_pointer(), name="prev") + + self.builder.store(table_head, current) + self.builder.store(table_ptr, prev) + + loop_block = self.hash_remove.append_basic_block(name="search_loop") + check_block = self.hash_remove.append_basic_block(name="check_key") + found_block = self.hash_remove.append_basic_block(name="found_entry") + not_found_block = self.hash_remove.append_basic_block(name="not_found") + next_block = self.hash_remove.append_basic_block(name="next_entry") + + self.builder.branch(loop_block) + + self.builder.position_at_end(loop_block) + current_val = self.builder.load(current) + is_null = self.builder.icmp_signed('==', current_val, ir.Constant(self.void_ptr, None)) + self.builder.cbranch(is_null, not_found_block, check_block) + + self.builder.position_at_end(check_block) + entry_ptr = self.builder.bitcast(current_val, self.hash_entry_type.as_pointer()) + + entry_key_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 0)]) + entry_key = self.builder.load(entry_key_ptr) + entry_key_len_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) + entry_key_len = self.builder.load(entry_key_len_ptr) + + len_match = self.builder.icmp_signed('==', entry_key_len, key_len) + content_check_block = self.hash_remove.append_basic_block(name="content_check") + + self.builder.cbranch(len_match, content_check_block, next_block) + + self.builder.position_at_end(content_check_block) + strings_match = self._compare_strings(entry_key, key, key_len) + self.builder.cbranch(strings_match, found_block, next_block) + + self.builder.position_at_end(found_block) + next_entry_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + next_entry = self.builder.load(next_entry_ptr) + + prev_val = self.builder.load(prev) + self.builder.store(next_entry, prev_val) + + self.builder.call(self.free_func, [current_val]) + + self.builder.ret(ir.Constant(self.int_type, 0)) + self.builder.position_at_end(next_block) + next_entry_ptr2 = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + next_entry2 = self.builder.load(next_entry_ptr2) + + entry_next_field_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + self.builder.store(entry_next_field_ptr, prev) + self.builder.store(next_entry2, current) + self.builder.branch(loop_block) + + self.builder.position_at_end(not_found_block) + self.builder.ret(ir.Constant(self.int_type, -1)) + + def _remove_all_edges_for_vertex(self, graph_ptr, vertex_name, vertex_name_len): + + edge_map_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 4)]) + + edges_to_remove = self.builder.alloca(self.void_ptr, name="edges_to_remove_list") + self.builder.store(ir.Constant(self.void_ptr, None), edges_to_remove) + + edge_map = self.builder.load(edge_map_ptr) + current_entry = self.builder.alloca(self.void_ptr, name="current_entry") + self.builder.store(edge_map, current_entry) + + collect_loop_block = self.builder.block.parent.append_basic_block(name="collect_edge_loop") + check_collect_block = self.builder.block.parent.append_basic_block(name="check_collect_edge") + add_to_remove_list_block = self.builder.block.parent.append_basic_block(name="add_to_remove_list") + next_collect_block = self.builder.block.parent.append_basic_block(name="next_collect_edge") + removal_phase_block = self.builder.block.parent.append_basic_block(name="removal_phase") + + self.builder.branch(collect_loop_block) + + self.builder.position_at_end(collect_loop_block) + current_entry_val = self.builder.load(current_entry) + is_null = self.builder.icmp_signed('==', current_entry_val, ir.Constant(self.void_ptr, None)) + self.builder.cbranch(is_null, removal_phase_block, check_collect_block) + + self.builder.position_at_end(check_collect_block) + entry_ptr = self.builder.bitcast(current_entry_val, self.hash_entry_type.as_pointer()) + + edge_key_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 0)]) + edge_key = self.builder.load(edge_key_ptr) + edge_key_len_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) + edge_key_len = self.builder.load(edge_key_len_ptr) + + contains_vertex = self._string_contains_substring(edge_key, edge_key_len, vertex_name, vertex_name_len) + self.builder.cbranch(contains_vertex, add_to_remove_list_block, next_collect_block) + + self.builder.position_at_end(add_to_remove_list_block) + + list_node_size = ir.Constant(self.int64_type, 24) + list_node_mem = self.builder.call(self.malloc_func, [list_node_size]) + + key_ptr_field = self.builder.bitcast(list_node_mem, self.char_ptr.as_pointer()) + self.builder.store(edge_key, key_ptr_field) + + key_len_offset = self.builder.gep(list_node_mem, [ir.Constant(self.int64_type, 8)]) + key_len_field = self.builder.bitcast(key_len_offset, self.int_type.as_pointer()) + self.builder.store(edge_key_len, key_len_field) + + next_offset = self.builder.gep(list_node_mem, [ir.Constant(self.int64_type, 16)]) + next_field = self.builder.bitcast(next_offset, self.void_ptr.as_pointer()) + old_head = self.builder.load(edges_to_remove) + self.builder.store(old_head, next_field) + + self.builder.store(list_node_mem, edges_to_remove) + self.builder.branch(next_collect_block) + + self.builder.position_at_end(next_collect_block) + next_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + next_entry = self.builder.load(next_ptr) + self.builder.store(next_entry, current_entry) + self.builder.branch(collect_loop_block) + + self.builder.position_at_end(removal_phase_block) + + removal_loop_block = self.builder.block.parent.append_basic_block(name="removal_loop") + process_removal_block = self.builder.block.parent.append_basic_block(name="process_removal") + done_edge_cleanup = self.builder.block.parent.append_basic_block(name="done_edge_cleanup") + + self.builder.branch(removal_loop_block) + + self.builder.position_at_end(removal_loop_block) + current_remove_node = self.builder.load(edges_to_remove) + is_done = self.builder.icmp_signed('==', current_remove_node, ir.Constant(self.void_ptr, None)) + self.builder.cbranch(is_done, done_edge_cleanup, process_removal_block) + + self.builder.position_at_end(process_removal_block) + + key_from_node_ptr = self.builder.bitcast(current_remove_node, self.char_ptr.as_pointer()) + key_from_node = self.builder.load(key_from_node_ptr) + + key_len_offset = self.builder.gep(current_remove_node, [ir.Constant(self.int64_type, 8)]) + key_len_from_node_ptr = self.builder.bitcast(key_len_offset, self.int_type.as_pointer()) + key_len_from_node = self.builder.load(key_len_from_node_ptr) + + next_offset = self.builder.gep(current_remove_node, [ir.Constant(self.int64_type, 16)]) + next_remove_node_ptr = self.builder.bitcast(next_offset, self.void_ptr.as_pointer()) + next_remove_node = self.builder.load(next_remove_node_ptr) + self.builder.store(next_remove_node, edges_to_remove) + + edge_to_free = self.builder.call(self.hash_lookup, [edge_map, key_from_node, key_len_from_node]) + edge_exists = self.builder.icmp_signed('!=', edge_to_free, ir.Constant(self.void_ptr, None)) + + free_edge_block = self.builder.block.parent.append_basic_block(name="free_edge_obj") + remove_from_hash_block = self.builder.block.parent.append_basic_block(name="remove_from_hash") + + self.builder.cbranch(edge_exists, free_edge_block, remove_from_hash_block) + + self.builder.position_at_end(free_edge_block) + self.builder.call(self.free_func, [edge_to_free]) + self.builder.branch(remove_from_hash_block) + + self.builder.position_at_end(remove_from_hash_block) + self.builder.call(self.hash_remove, [edge_map_ptr, key_from_node, key_len_from_node]) + + self.builder.call(self.free_func, [current_remove_node]) + + self.builder.branch(removal_loop_block) + + self.builder.position_at_end(done_edge_cleanup) + + def _remove_vertex_from_node_adjacency(self, node_ptr, vertex_name, vertex_name_len): + + adj_list_ptr = self.builder.gep(node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + adj_count_ptr = self.builder.gep(node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 4)]) + + adj_list = self.builder.load(adj_list_ptr) + adj_count = self.builder.load(adj_count_ptr) + + adj_exists = self.builder.icmp_signed('!=', adj_list, ir.Constant(self.void_ptr, None)) + + process_adj_block = self.builder.block.parent.append_basic_block(name="process_adj_list") + skip_adj_block = self.builder.block.parent.append_basic_block(name="skip_adj_list") + + self.builder.cbranch(adj_exists, process_adj_block, skip_adj_block) + + self.builder.position_at_end(process_adj_block) + + write_index = self.builder.alloca(self.int_type, name="write_idx") + self.builder.store(ir.Constant(self.int_type, 0), write_index) + + i = self.builder.alloca(self.int_type, name="read_idx") + self.builder.store(ir.Constant(self.int_type, 0), i) + + compact_loop_block = self.builder.block.parent.append_basic_block(name="compact_loop") + check_vertex_block = self.builder.block.parent.append_basic_block(name="check_vertex") + keep_vertex_block = self.builder.block.parent.append_basic_block(name="keep_vertex") + skip_vertex_block = self.builder.block.parent.append_basic_block(name="skip_vertex") + update_count_block = self.builder.block.parent.append_basic_block(name="update_count") + + self.builder.branch(compact_loop_block) + + self.builder.position_at_end(compact_loop_block) + i_val = self.builder.load(i) + loop_condition = self.builder.icmp_signed('<', i_val, adj_count) + self.builder.cbranch(loop_condition, check_vertex_block, update_count_block) + + self.builder.position_at_end(check_vertex_block) + ptr_size = ir.Constant(self.int64_type, 8) + read_offset_64 = self.builder.mul(self.builder.zext(i_val, self.int64_type), ptr_size) + read_entry_ptr = self.builder.gep(adj_list, [read_offset_64]) + read_entry_typed = self.builder.bitcast(read_entry_ptr, self.node_type.as_pointer().as_pointer()) + adj_node = self.builder.load(read_entry_typed) + + adj_node_name_ptr = self.builder.gep(adj_node, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) + adj_node_name = self.builder.load(adj_node_name_ptr) + adj_node_name_len_ptr = self.builder.gep(adj_node, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 2)]) + adj_node_name_len = self.builder.load(adj_node_name_len_ptr) + + len_match = self.builder.icmp_signed('==', adj_node_name_len, vertex_name_len) + + content_cmp_block = self.builder.block.parent.append_basic_block(name="content_cmp") + next_read_block = self.builder.block.parent.append_basic_block(name="next_read") + + self.builder.cbranch(len_match, content_cmp_block, keep_vertex_block) + + self.builder.position_at_end(content_cmp_block) + names_match = self._compare_strings(adj_node_name, vertex_name, vertex_name_len) + self.builder.cbranch(names_match, skip_vertex_block, keep_vertex_block) + + self.builder.position_at_end(keep_vertex_block) + write_idx_val = self.builder.load(write_index) + + indices_different = self.builder.icmp_signed('!=', i_val, write_idx_val) + + do_copy_block = self.builder.block.parent.append_basic_block(name="do_copy") + advance_write_block = self.builder.block.parent.append_basic_block(name="advance_write") + + self.builder.cbranch(indices_different, do_copy_block, advance_write_block) + + self.builder.position_at_end(do_copy_block) + write_offset_64 = self.builder.mul(self.builder.zext(write_idx_val, self.int64_type), ptr_size) + write_entry_ptr = self.builder.gep(adj_list, [write_offset_64]) + write_entry_typed = self.builder.bitcast(write_entry_ptr, self.node_type.as_pointer().as_pointer()) + self.builder.store(adj_node, write_entry_typed) + self.builder.branch(advance_write_block) + + self.builder.position_at_end(advance_write_block) + next_write_idx = self.builder.add(write_idx_val, ir.Constant(self.int_type, 1)) + self.builder.store(next_write_idx, write_index) + self.builder.branch(next_read_block) + + self.builder.position_at_end(skip_vertex_block) + self.builder.branch(next_read_block) + + self.builder.position_at_end(next_read_block) + next_i = self.builder.add(i_val, ir.Constant(self.int_type, 1)) + self.builder.store(next_i, i) + self.builder.branch(compact_loop_block) + + self.builder.position_at_end(update_count_block) + final_write_idx = self.builder.load(write_index) + self.builder.store(final_write_idx, adj_count_ptr) + self.builder.branch(skip_adj_block) + + self.builder.position_at_end(skip_adj_block) + + def _string_contains_substring(self, haystack, haystack_len, needle, needle_len): + + too_long = self.builder.icmp_signed('>', needle_len, haystack_len) + + false_block = self.builder.block.parent.append_basic_block(name="substr_false") + search_block = self.builder.block.parent.append_basic_block(name="substr_search") + true_block = self.builder.block.parent.append_basic_block(name="substr_true") + + self.builder.cbranch(too_long, false_block, search_block) + + self.builder.position_at_end(search_block) + max_start = self.builder.sub(haystack_len, needle_len) + max_start = self.builder.add(max_start, ir.Constant(self.int_type, 1)) + + i = self.builder.alloca(self.int_type, name="search_i") + self.builder.store(ir.Constant(self.int_type, 0), i) + + outer_loop_block = self.builder.block.parent.append_basic_block(name="outer_search_loop") + inner_loop_block = self.builder.block.parent.append_basic_block(name="inner_search_loop") + check_char_block = self.builder.block.parent.append_basic_block(name="check_char") + match_found_block = self.builder.block.parent.append_basic_block(name="match_found") + no_match_block = self.builder.block.parent.append_basic_block(name="no_match") + continue_outer_block = self.builder.block.parent.append_basic_block(name="continue_outer") + + self.builder.branch(outer_loop_block) + + self.builder.position_at_end(outer_loop_block) + i_val = self.builder.load(i) + outer_condition = self.builder.icmp_signed('<', i_val, max_start) + self.builder.cbranch(outer_condition, inner_loop_block, false_block) + + self.builder.position_at_end(inner_loop_block) + j = self.builder.alloca(self.int_type, name="search_j") + self.builder.store(ir.Constant(self.int_type, 0), j) + self.builder.branch(check_char_block) + + self.builder.position_at_end(check_char_block) + j_val = self.builder.load(j) + inner_condition = self.builder.icmp_signed('<', j_val, needle_len) + self.builder.cbranch(inner_condition, no_match_block, match_found_block) + + self.builder.position_at_end(no_match_block) + haystack_idx = self.builder.add(i_val, j_val) + haystack_char_ptr = self.builder.gep(haystack, [haystack_idx]) + needle_char_ptr = self.builder.gep(needle, [j_val]) + + haystack_char = self.builder.load(haystack_char_ptr) + needle_char = self.builder.load(needle_char_ptr) + + chars_match = self.builder.icmp_signed('==', haystack_char, needle_char) + + char_match_block = self.builder.block.parent.append_basic_block(name="char_match") + self.builder.cbranch(chars_match, char_match_block, continue_outer_block) + + self.builder.position_at_end(char_match_block) + next_j = self.builder.add(j_val, ir.Constant(self.int_type, 1)) + self.builder.store(next_j, j) + self.builder.branch(check_char_block) + + self.builder.position_at_end(match_found_block) + self.builder.branch(true_block) + + self.builder.position_at_end(continue_outer_block) + next_i = self.builder.add(i_val, ir.Constant(self.int_type, 1)) + self.builder.store(next_i, i) + self.builder.branch(outer_loop_block) + + self.builder.position_at_end(true_block) + result_true = ir.Constant(self.bool_type, 1) + merge_block = self.builder.block.parent.append_basic_block(name="merge") + self.builder.branch(merge_block) + + self.builder.position_at_end(false_block) + result_false = ir.Constant(self.bool_type, 0) + self.builder.branch(merge_block) + + self.builder.position_at_end(merge_block) + phi = self.builder.phi(self.bool_type, name="substr_result") + phi.add_incoming(result_true, true_block) + phi.add_incoming(result_false, false_block) + + return phi + + def _create_graph_cleanup(self): + + cleanup_type = ir.FunctionType(self.void_type, [self.graph_type.as_pointer()]) + self.graph_cleanup = ir.Function(self.module, cleanup_type, name="graph_cleanup") + + block = self.graph_cleanup.append_basic_block(name="entry") + self.builder = ir.IRBuilder(block) + + graph_ptr = self.graph_cleanup.args[0] + + graph_void = self.builder.bitcast(graph_ptr, self.void_ptr) + self.builder.call(self.free_func, [graph_void]) + + self.builder.ret_void() + + def compile_to_machine_code(self): + + target = llvm.Target.from_default_triple() + target_machine = target.create_target_machine() + + mod = llvm.parse_assembly(str(self.module)) + mod.verify() + + ee = llvm.create_mcjit_compiler(mod, target_machine) + ee.finalize_object() + + functions = {} + function_names = [ + 'graph_init', 'add_vertex', 'add_edge', 'is_adjacent', + 'remove_vertex', 'remove_edge', 'graph_cleanup' + ] + + for name in function_names: + func_ptr = ee.get_function_address(name) + functions[name] = func_ptr + + return functions, ee diff --git a/pydatastructs/graphs/_extensions.py b/pydatastructs/graphs/_extensions.py index f550d5254..677bc68cd 100644 --- a/pydatastructs/graphs/_extensions.py +++ b/pydatastructs/graphs/_extensions.py @@ -18,6 +18,19 @@ include_dir = os.path.abspath(os.path.join(project, 'utils', '_backend', 'cpp')) -extensions = [Extension(graph, sources=graph_sources,include_dirs=[include_dir], language="c++", extra_compile_args=["-std=c++17"]), - Extension(algorithms, sources=algorithms_sources,include_dirs=[include_dir], language="c++", extra_compile_args=["-std=c++17"]), - ] +extensions = [ + Extension( + graph, + sources=graph_sources, + include_dirs=[include_dir], + language="c++", + extra_compile_args=["-std=c++17", "-mmacosx-version-min=10.13"], + ), + Extension( + algorithms, + sources=algorithms_sources, + include_dirs=[include_dir], + language="c++", + extra_compile_args=["-std=c++17", "-mmacosx-version-min=10.13"], + ), +] diff --git a/pydatastructs/graphs/adjacency_list.py b/pydatastructs/graphs/adjacency_list.py index bd901b380..a8da3c959 100644 --- a/pydatastructs/graphs/adjacency_list.py +++ b/pydatastructs/graphs/adjacency_list.py @@ -7,6 +7,8 @@ 'AdjacencyList' ] +_llvm_backend = None + class AdjacencyList(Graph): """ Adjacency list implementation of graphs. @@ -27,11 +29,37 @@ def __new__(cls, *vertices, **kwargs): obj.edge_weights = {} obj._impl = 'adjacency_list' return obj - else: + elif backend == Backend.CPP: graph = _graph.AdjacencyListGraph() for vertice in vertices: graph.add_vertex(vertice) return graph + elif backend == Backend.LLVM: + def initialize_llvm_graph_backend(): + global _llvm_backend + if _llvm_backend is not None: + return _llvm_backend + + from pydatastructs.graphs._backend.cpp.llvm_adjacency_list import LLVMAdjacencyListGraph # Import your LLVM IR class + + llvm_graph = LLVMAdjacencyListGraph() + + functions, execution_engine = llvm_graph.compile_to_machine_code() + + _graph.initialize_llvm_backend(functions, id(execution_engine)) + + _llvm_backend = { + 'llvm_graph': llvm_graph, + 'functions': functions, + 'execution_engine': execution_engine + } + + return _llvm_backend + initialize_llvm_graph_backend() + graph = _graph.AdjacencyListGraphLLVM() + for vertex in vertices: + graph.add_vertex(vertex) + return graph @classmethod def methods(self): diff --git a/pydatastructs/graphs/tests/test_adjacency_list.py b/pydatastructs/graphs/tests/test_adjacency_list.py index 3a9cdb14f..2282eccfb 100644 --- a/pydatastructs/graphs/tests/test_adjacency_list.py +++ b/pydatastructs/graphs/tests/test_adjacency_list.py @@ -81,3 +81,15 @@ def test_adjacency_list(): assert g2.is_adjacent('v_4', 'v') is False g2.remove_vertex('v') assert raises(ValueError, lambda: g2.add_edge('v_4', 'v')) + + g3 = Graph('a','b',implementation = 'adjacency_list', backend = Backend.LLVM) + g3.add_edge('a', 'b',10) + assert g3.is_adjacent('a','b') is True + g3.add_vertex('c') + g3.add_edge('a','c') + assert g3.is_adjacent('a','c') is True + assert g3.is_adjacent('b','c') is False + g3.remove_edge('a','b') + assert g3.is_adjacent('a','b') is False + g3.remove_vertex('a') + assert g3.is_adjacent('a','c') is False diff --git a/pydatastructs/linear_data_structures/_backend/cpp/_algorithms.py b/pydatastructs/linear_data_structures/_backend/cpp/_algorithms.py new file mode 100644 index 000000000..e69de29bb diff --git a/pydatastructs/linear_data_structures/_backend/cpp/_arrays.py b/pydatastructs/linear_data_structures/_backend/cpp/_arrays.py new file mode 100644 index 000000000..e69de29bb diff --git a/pydatastructs/linear_data_structures/_backend/cpp/algorithms/llvm_algorithms.py b/pydatastructs/linear_data_structures/_backend/cpp/algorithms/llvm_algorithms.py index 20c4c4ce6..9280a1f33 100644 --- a/pydatastructs/linear_data_structures/_backend/cpp/algorithms/llvm_algorithms.py +++ b/pydatastructs/linear_data_structures/_backend/cpp/algorithms/llvm_algorithms.py @@ -13,7 +13,6 @@ _fn_ptr_cache = {} def _cleanup(): - """Clean up LLVM resources on exit.""" global _engines, _target_machine, _fn_ptr_cache _engines.clear() _target_machine = None @@ -27,9 +26,8 @@ def _ensure_target_machine(): return try: - binding.initialize() - binding.initialize_native_target() - binding.initialize_native_asmprinter() + binding.initialize_all_targets() + binding.initialize_all_asmprinters() target = binding.Target.from_default_triple() _target_machine = target.create_target_machine( @@ -40,7 +38,6 @@ def _ensure_target_machine(): raise RuntimeError(f"Failed to initialize LLVM target machine: {e}") def get_bubble_sort_ptr(dtype: str) -> int: - """Get function pointer for bubble sort with specified dtype.""" dtype = dtype.lower().strip() if dtype not in _SUPPORTED: raise ValueError(f"Unsupported dtype '{dtype}'. Supported: {list(_SUPPORTED)}") @@ -148,32 +145,6 @@ def _materialize(dtype: str) -> int: mod = binding.parse_assembly(llvm_ir) mod.verify() - pmb = binding.PassManagerBuilder() - pmb.opt_level = 3 - pmb.loop_vectorize = True - pmb.slp_vectorize = True - - fpm = binding.create_function_pass_manager(mod) - pm = binding.create_module_pass_manager() - - pm.add_basic_alias_analysis_pass() - pm.add_type_based_alias_analysis_pass() - pm.add_instruction_combining_pass() - pm.add_gvn_pass() - pm.add_cfg_simplification_pass() - pm.add_loop_unroll_pass() - pm.add_loop_unswitch_pass() - - pmb.populate(fpm) - pmb.populate(pm) - - fpm.initialize() - for func in mod.functions: - fpm.run(func) - fpm.finalize() - - pm.run(mod) - engine = binding.create_mcjit_compiler(mod, _target_machine) engine.finalize_object() engine.run_static_constructors() diff --git a/pydatastructs/utils/_extensions.py b/pydatastructs/utils/_extensions.py index bdf351b2a..26966bbaf 100644 --- a/pydatastructs/utils/_extensions.py +++ b/pydatastructs/utils/_extensions.py @@ -36,11 +36,11 @@ Extension( nodes, sources=nodes_sources, - extra_compile_args=["-std=c++17"] + extra_compile_args=["-std=c++17", "-mmacosx-version-min=10.13"], ), Extension( graph_utils, sources=graph_utils_sources, - extra_compile_args=["-std=c++17"] - ) + extra_compile_args=["-std=c++17", "-mmacosx-version-min=10.13"], + ), ] From 49d7f9e9aac39deef7b6dd7b4e1a6fc4d3691d00 Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Thu, 25 Sep 2025 13:50:46 +0530 Subject: [PATCH 03/19] bug fix --- pydatastructs/graphs/_extensions.py | 9 +++++++-- pydatastructs/utils/_extensions.py | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/pydatastructs/graphs/_extensions.py b/pydatastructs/graphs/_extensions.py index 677bc68cd..b0caeb9c7 100644 --- a/pydatastructs/graphs/_extensions.py +++ b/pydatastructs/graphs/_extensions.py @@ -1,5 +1,6 @@ from setuptools import Extension import os +import sys project = 'pydatastructs' @@ -18,19 +19,23 @@ include_dir = os.path.abspath(os.path.join(project, 'utils', '_backend', 'cpp')) +extra_compile_args = ["-std=c++17"] +if sys.platform == "darwin": + extra_compile_args.append("-mmacosx-version-min=10.13") + extensions = [ Extension( graph, sources=graph_sources, include_dirs=[include_dir], language="c++", - extra_compile_args=["-std=c++17", "-mmacosx-version-min=10.13"], + extra_compile_args=extra_compile_args, ), Extension( algorithms, sources=algorithms_sources, include_dirs=[include_dir], language="c++", - extra_compile_args=["-std=c++17", "-mmacosx-version-min=10.13"], + extra_compile_args=extra_compile_args, ), ] diff --git a/pydatastructs/utils/_extensions.py b/pydatastructs/utils/_extensions.py index 26966bbaf..64d6250ec 100644 --- a/pydatastructs/utils/_extensions.py +++ b/pydatastructs/utils/_extensions.py @@ -36,11 +36,11 @@ Extension( nodes, sources=nodes_sources, - extra_compile_args=["-std=c++17", "-mmacosx-version-min=10.13"], + extra_compile_args=["-std=c++17"], ), Extension( graph_utils, sources=graph_utils_sources, - extra_compile_args=["-std=c++17", "-mmacosx-version-min=10.13"], + extra_compile_args=["-std=c++17"], ), ] From dde575efd439f03a6981246a05d0ae3b64b3e680 Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Thu, 25 Sep 2025 23:20:25 +0530 Subject: [PATCH 04/19] bug fix --- .../_backend/cpp/llvm_adjacency_list.py | 62 ++++++++++--------- pydatastructs/graphs/adjacency_list.py | 2 +- .../cpp/algorithms/llvm_algorithms.py | 34 +++------- 3 files changed, 42 insertions(+), 56 deletions(-) diff --git a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py index ced108cfe..e1ad286a7 100644 --- a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py +++ b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py @@ -528,10 +528,12 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): self.builder.mul(current_capacity, ir.Constant(self.int_type, 2)) ) - ptr_size = ir.Constant(self.int64_type, 8) - new_adj_size_64 = self.builder.mul(self.builder.zext(new_capacity, self.int64_type), ptr_size) - new_adj_mem = self.builder.call(self.malloc_func, [new_adj_size_64]) - new_adj_array = self.builder.bitcast(new_adj_mem, self.void_ptr) + node_ptr_type = self.node_type.as_pointer() + ptr_size_bytes = ir.Constant(self.int64_type, 8) + new_size_bytes = self.builder.mul(self.builder.zext(new_capacity, self.int64_type), ptr_size_bytes) + + new_array_mem = self.builder.call(self.malloc_func, [new_size_bytes]) + new_array = self.builder.bitcast(new_array_mem, node_ptr_type.as_pointer()) copy_adj_block = self.builder.block.parent.append_basic_block(name="copy_adj") no_copy_adj_block = self.builder.block.parent.append_basic_block(name="no_copy_adj") @@ -540,26 +542,31 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): self.builder.cbranch(has_existing_adj, copy_adj_block, no_copy_adj_block) self.builder.position_at_end(copy_adj_block) - old_adj_array = self.builder.load(adj_list_ptr) - old_adj_size_64 = self.builder.mul(self.builder.zext(current_count, self.int64_type), ptr_size) - self.builder.call(self.memcpy_func, [new_adj_array, old_adj_array, old_adj_size_64]) + old_adj_array_void = self.builder.load(adj_list_ptr) + old_size_bytes = self.builder.mul(self.builder.zext(current_count, self.int64_type), ptr_size_bytes) - self.builder.call(self.free_func, [old_adj_array]) + new_array_void = self.builder.bitcast(new_array, self.void_ptr) + self.builder.call(self.memcpy_func, [new_array_void, old_adj_array_void, old_size_bytes]) + + self.builder.call(self.free_func, [old_adj_array_void]) self.builder.branch(no_copy_adj_block) self.builder.position_at_end(no_copy_adj_block) - self.builder.store(new_adj_array, adj_list_ptr) + new_array_void = self.builder.bitcast(new_array, self.void_ptr) + self.builder.store(new_array_void, adj_list_ptr) self.builder.store(new_capacity, adj_cap_ptr) self.builder.branch(add_adj_block) self.builder.position_at_end(add_adj_block) - adj_array = self.builder.load(adj_list_ptr) + + adj_array_void = self.builder.load(adj_list_ptr) + adj_array_typed = self.builder.bitcast(adj_array_void, node_ptr_type.as_pointer()) + current_count_final = self.builder.load(adj_count_ptr) - offset_64 = self.builder.mul(self.builder.zext(current_count_final, self.int64_type), ptr_size) - tgt_slot_ptr = self.builder.gep(adj_array, [offset_64]) - tgt_slot_typed = self.builder.bitcast(tgt_slot_ptr, self.node_type.as_pointer().as_pointer()) - self.builder.store(tgt_node_ptr, tgt_slot_typed) + tgt_slot_ptr = self.builder.gep(adj_array_typed, [current_count_final]) + + self.builder.store(tgt_node_ptr, tgt_slot_ptr) new_adj_count = self.builder.add(current_count_final, ir.Constant(self.int_type, 1)) self.builder.store(new_adj_count, adj_count_ptr) @@ -922,13 +929,15 @@ def _remove_from_all_adjacency_lists(self, graph_ptr, vertex_name, vertex_name_l self.builder.position_at_end(done_adj_cleanup) def _remove_from_adjacency_list(self, src_node_ptr, tgt_node_ptr): - adj_list_ptr = self.builder.gep(src_node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) adj_count_ptr = self.builder.gep(src_node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 4)]) - adj_list = self.builder.load(adj_list_ptr) + adj_list_void = self.builder.load(adj_list_ptr) adj_count = self.builder.load(adj_count_ptr) + node_ptr_type = self.node_type.as_pointer() + adj_list_typed = self.builder.bitcast(adj_list_void, node_ptr_type.as_pointer()) + i = self.builder.alloca(self.int_type, name="adj_i") self.builder.store(ir.Constant(self.int_type, 0), i) @@ -947,11 +956,9 @@ def _remove_from_adjacency_list(self, src_node_ptr, tgt_node_ptr): self.builder.cbranch(loop_condition, adj_check_block, adj_done_block) self.builder.position_at_end(adj_check_block) - ptr_size = ir.Constant(self.int64_type, 8) - offset_64 = self.builder.mul(self.builder.zext(i_val, self.int64_type), ptr_size) - adj_entry_ptr = self.builder.gep(adj_list, [offset_64]) - adj_entry_typed = self.builder.bitcast(adj_entry_ptr, self.node_type.as_pointer().as_pointer()) - adj_node = self.builder.load(adj_entry_typed) + + adj_entry_ptr = self.builder.gep(adj_list_typed, [i_val]) + adj_node = self.builder.load(adj_entry_ptr) is_target = self.builder.icmp_signed('==', adj_node, tgt_node_ptr) self.builder.cbranch(is_target, adj_found_block, adj_next_block) @@ -972,17 +979,12 @@ def _remove_from_adjacency_list(self, src_node_ptr, tgt_node_ptr): self.builder.cbranch(shift_condition, do_adj_shift_block, finish_adj_shift_block) self.builder.position_at_end(do_adj_shift_block) - src_offset_64 = self.builder.mul(self.builder.zext(next_shift_idx, self.int64_type), ptr_size) - dst_offset_64 = self.builder.mul(self.builder.zext(shift_i_val, self.int64_type), ptr_size) - - src_adj_ptr = self.builder.gep(adj_list, [src_offset_64]) - dst_adj_ptr = self.builder.gep(adj_list, [dst_offset_64]) - src_adj_typed = self.builder.bitcast(src_adj_ptr, self.node_type.as_pointer().as_pointer()) - dst_adj_typed = self.builder.bitcast(dst_adj_ptr, self.node_type.as_pointer().as_pointer()) + src_adj_ptr = self.builder.gep(adj_list_typed, [next_shift_idx]) + dst_adj_ptr = self.builder.gep(adj_list_typed, [shift_i_val]) - node_to_shift = self.builder.load(src_adj_typed) - self.builder.store(node_to_shift, dst_adj_typed) + node_to_shift = self.builder.load(src_adj_ptr) + self.builder.store(node_to_shift, dst_adj_ptr) self.builder.store(next_shift_idx, shift_i) self.builder.branch(adj_shift_block) diff --git a/pydatastructs/graphs/adjacency_list.py b/pydatastructs/graphs/adjacency_list.py index a8da3c959..f0cdcfaed 100644 --- a/pydatastructs/graphs/adjacency_list.py +++ b/pydatastructs/graphs/adjacency_list.py @@ -40,7 +40,7 @@ def initialize_llvm_graph_backend(): if _llvm_backend is not None: return _llvm_backend - from pydatastructs.graphs._backend.cpp.llvm_adjacency_list import LLVMAdjacencyListGraph # Import your LLVM IR class + from pydatastructs.graphs._backend.cpp.llvm_adjacency_list import LLVMAdjacencyListGraph llvm_graph = LLVMAdjacencyListGraph() diff --git a/pydatastructs/linear_data_structures/_backend/cpp/algorithms/llvm_algorithms.py b/pydatastructs/linear_data_structures/_backend/cpp/algorithms/llvm_algorithms.py index 0115d9a18..397fec152 100644 --- a/pydatastructs/linear_data_structures/_backend/cpp/algorithms/llvm_algorithms.py +++ b/pydatastructs/linear_data_structures/_backend/cpp/algorithms/llvm_algorithms.py @@ -145,31 +145,15 @@ def _materialize(dtype: str) -> int: mod = binding.parse_assembly(llvm_ir) mod.verify() - pmb = binding.PassManagerBuilder() - pmb.opt_level = 3 - pmb.loop_vectorize = True - pmb.slp_vectorize = True - - fpm = binding.create_function_pass_manager(mod) - pm = binding.create_module_pass_manager() - - pm.add_basic_alias_analysis_pass() - pm.add_type_based_alias_analysis_pass() - pm.add_instruction_combining_pass() - pm.add_gvn_pass() - pm.add_cfg_simplification_pass() - pm.add_loop_unroll_pass() - pm.add_loop_unswitch_pass() - - pmb.populate(fpm) - pmb.populate(pm) - - fpm.initialize() - for func in mod.functions: - fpm.run(func) - fpm.finalize() - - pm.run(mod) + try: + pm = binding.ModulePassManager() + pm.add_instruction_combining_pass() + pm.add_reassociate_pass() + pm.add_gvn_pass() + pm.add_cfg_simplification_pass() + pm.run(mod) + except AttributeError: + pass engine = binding.create_mcjit_compiler(mod, _target_machine) engine.finalize_object() From 5cbabd2028814729a53f4003dfe0a74089303155 Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Thu, 25 Sep 2025 23:43:56 +0530 Subject: [PATCH 05/19] bug fix --- .../_backend/cpp/llvm_adjacency_list.py | 36 +++++++++---------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py index e1ad286a7..ff5293e23 100644 --- a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py +++ b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py @@ -223,16 +223,20 @@ def _create_hash_functions(self): len_match = self.builder.icmp_signed('==', entry_key_len, key_len) - next_entry_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) - next_entry = self.builder.load(next_entry_ptr) - self.builder.store(next_entry, current) - content_check_block = self.hash_lookup.append_basic_block(name="content_check") - self.builder.cbranch(len_match, content_check_block, loop_block) + next_block = self.hash_lookup.append_basic_block(name="next_entry") + + self.builder.cbranch(len_match, content_check_block, next_block) self.builder.position_at_end(content_check_block) strings_match = self._compare_strings(entry_key, key, key_len) - self.builder.cbranch(strings_match, found_block, loop_block) + self.builder.cbranch(strings_match, found_block, next_block) + + self.builder.position_at_end(next_block) + next_entry_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) + next_entry = self.builder.load(next_entry_ptr) + self.builder.store(next_entry, current) + self.builder.branch(loop_block) self.builder.position_at_end(found_block) value_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 2)]) @@ -529,10 +533,8 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): ) node_ptr_type = self.node_type.as_pointer() - ptr_size_bytes = ir.Constant(self.int64_type, 8) - new_size_bytes = self.builder.mul(self.builder.zext(new_capacity, self.int64_type), ptr_size_bytes) - - new_array_mem = self.builder.call(self.malloc_func, [new_size_bytes]) + new_size_elements = self.builder.zext(new_capacity, self.int64_type) + new_array_mem = self.builder.call(self.malloc_func, [self.builder.mul(new_size_elements, ir.Constant(self.int64_type, node_ptr_type.get_abi_size(llvm.create_target_data(""))))]) new_array = self.builder.bitcast(new_array_mem, node_ptr_type.as_pointer()) copy_adj_block = self.builder.block.parent.append_basic_block(name="copy_adj") @@ -543,7 +545,8 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): self.builder.position_at_end(copy_adj_block) old_adj_array_void = self.builder.load(adj_list_ptr) - old_size_bytes = self.builder.mul(self.builder.zext(current_count, self.int64_type), ptr_size_bytes) + old_size_elements = self.builder.zext(current_count, self.int64_type) + old_size_bytes = self.builder.mul(old_size_elements, ir.Constant(self.int64_type, node_ptr_type.get_abi_size(llvm.create_target_data("")))) new_array_void = self.builder.bitcast(new_array, self.void_ptr) self.builder.call(self.memcpy_func, [new_array_void, old_adj_array_void, old_size_bytes]) @@ -558,12 +561,10 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): self.builder.branch(add_adj_block) self.builder.position_at_end(add_adj_block) - adj_array_void = self.builder.load(adj_list_ptr) adj_array_typed = self.builder.bitcast(adj_array_void, node_ptr_type.as_pointer()) current_count_final = self.builder.load(adj_count_ptr) - tgt_slot_ptr = self.builder.gep(adj_array_typed, [current_count_final]) self.builder.store(tgt_node_ptr, tgt_slot_ptr) @@ -604,7 +605,6 @@ def _create_hash_insert(self): self.builder.ret(ir.Constant(self.int_type, 0)) def _create_is_adjacent(self): - is_adj_type = ir.FunctionType(self.bool_type, [self.graph_type.as_pointer(), self.char_ptr, self.int_type, self.char_ptr, self.int_type]) self.is_adjacent = ir.Function(self.module, is_adj_type, name="is_adjacent") @@ -663,11 +663,9 @@ def _create_is_adjacent(self): self.builder.cbranch(loop_condition, adj_check_block, false_block) self.builder.position_at_end(adj_check_block) - ptr_size = ir.Constant(self.int64_type, 8) - offset_64 = self.builder.mul(self.builder.zext(i_val, self.int64_type), ptr_size) - adj_entry_ptr = self.builder.gep(adj_list, [offset_64]) - adj_entry_typed = self.builder.bitcast(adj_entry_ptr, self.node_type.as_pointer().as_pointer()) - adj_node = self.builder.load(adj_entry_typed) + adj_list_typed = self.builder.bitcast(adj_list, self.node_type.as_pointer().as_pointer()) + adj_entry_ptr = self.builder.gep(adj_list_typed, [i_val]) + adj_node = self.builder.load(adj_entry_ptr) nodes_match = self.builder.icmp_signed('==', adj_node, node2_ptr) self.builder.cbranch(nodes_match, true_block, adj_next_block) From f02888283e25ba19d593b5f6721541976c39dfe4 Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Fri, 26 Sep 2025 00:06:20 +0530 Subject: [PATCH 06/19] bug fix --- .../_backend/cpp/llvm_adjacency_list.py | 36 +++++++++++-------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py index ff5293e23..f2163ab2b 100644 --- a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py +++ b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py @@ -62,6 +62,14 @@ def _create_structures(self): self.int_type ]) + self.target_machine = None + + def _get_target_data(self): + if self.target_machine is None: + target = llvm.Target.from_default_triple() + self.target_machine = target.create_target_machine() + return self.target_machine.target_data + def _create_function_declarations(self): malloc_type = ir.FunctionType(self.void_ptr, [self.int64_type]) @@ -255,7 +263,7 @@ def _create_node_functions(self): name_ptr, name_len, node_id = self.create_node.args - node_size = ir.Constant(self.int64_type, self.node_type.get_abi_size(llvm.create_target_data(""))) + node_size = ir.Constant(self.int64_type, self.node_type.get_abi_size(self._get_target_data())) node_mem = self.builder.call(self.malloc_func, [node_size]) node_ptr = self.builder.bitcast(node_mem, self.node_type.as_pointer()) @@ -285,7 +293,7 @@ def _create_graph_init(self): block = self.graph_init.append_basic_block(name="entry") self.builder = ir.IRBuilder(block) - graph_size = ir.Constant(self.int64_type, self.graph_type.get_abi_size(llvm.create_target_data(""))) + graph_size = ir.Constant(self.int64_type, self.graph_type.get_abi_size(self._get_target_data())) graph_mem = self.builder.call(self.malloc_func, [graph_size]) graph_ptr = self.builder.bitcast(graph_mem, self.graph_type.as_pointer()) @@ -491,7 +499,7 @@ def _create_add_edge(self): self.builder.position_at_end(create_edge_block) - edge_size = ir.Constant(self.int64_type, self.edge_type.get_abi_size(llvm.create_target_data(""))) + edge_size = ir.Constant(self.int64_type, self.edge_type.get_abi_size(self._get_target_data())) edge_mem = self.builder.call(self.malloc_func, [edge_size]) edge_ptr = self.builder.bitcast(edge_mem, self.edge_type.as_pointer()) @@ -533,8 +541,9 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): ) node_ptr_type = self.node_type.as_pointer() + ptr_size = ir.Constant(self.int64_type, node_ptr_type.get_abi_size(self._get_target_data())) new_size_elements = self.builder.zext(new_capacity, self.int64_type) - new_array_mem = self.builder.call(self.malloc_func, [self.builder.mul(new_size_elements, ir.Constant(self.int64_type, node_ptr_type.get_abi_size(llvm.create_target_data(""))))]) + new_array_mem = self.builder.call(self.malloc_func, [self.builder.mul(new_size_elements, ptr_size)]) new_array = self.builder.bitcast(new_array_mem, node_ptr_type.as_pointer()) copy_adj_block = self.builder.block.parent.append_basic_block(name="copy_adj") @@ -546,7 +555,7 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): self.builder.position_at_end(copy_adj_block) old_adj_array_void = self.builder.load(adj_list_ptr) old_size_elements = self.builder.zext(current_count, self.int64_type) - old_size_bytes = self.builder.mul(old_size_elements, ir.Constant(self.int64_type, node_ptr_type.get_abi_size(llvm.create_target_data("")))) + old_size_bytes = self.builder.mul(old_size_elements, ptr_size) new_array_void = self.builder.bitcast(new_array, self.void_ptr) self.builder.call(self.memcpy_func, [new_array_void, old_adj_array_void, old_size_bytes]) @@ -582,7 +591,7 @@ def _create_hash_insert(self): table_ptr, key, key_len, value = self.hash_insert.args - entry_size = ir.Constant(self.int64_type, self.hash_entry_type.get_abi_size(llvm.create_target_data(""))) + entry_size = ir.Constant(self.int64_type, self.hash_entry_type.get_abi_size(self._get_target_data())) entry_mem = self.builder.call(self.malloc_func, [entry_size]) entry_ptr = self.builder.bitcast(entry_mem, self.hash_entry_type.as_pointer()) @@ -1225,11 +1234,9 @@ def _remove_vertex_from_node_adjacency(self, node_ptr, vertex_name, vertex_name_ self.builder.cbranch(loop_condition, check_vertex_block, update_count_block) self.builder.position_at_end(check_vertex_block) - ptr_size = ir.Constant(self.int64_type, 8) - read_offset_64 = self.builder.mul(self.builder.zext(i_val, self.int64_type), ptr_size) - read_entry_ptr = self.builder.gep(adj_list, [read_offset_64]) - read_entry_typed = self.builder.bitcast(read_entry_ptr, self.node_type.as_pointer().as_pointer()) - adj_node = self.builder.load(read_entry_typed) + adj_list_typed = self.builder.bitcast(adj_list, self.node_type.as_pointer().as_pointer()) + read_entry_ptr = self.builder.gep(adj_list_typed, [i_val]) + adj_node = self.builder.load(read_entry_ptr) adj_node_name_ptr = self.builder.gep(adj_node, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) adj_node_name = self.builder.load(adj_node_name_ptr) @@ -1258,10 +1265,8 @@ def _remove_vertex_from_node_adjacency(self, node_ptr, vertex_name, vertex_name_ self.builder.cbranch(indices_different, do_copy_block, advance_write_block) self.builder.position_at_end(do_copy_block) - write_offset_64 = self.builder.mul(self.builder.zext(write_idx_val, self.int64_type), ptr_size) - write_entry_ptr = self.builder.gep(adj_list, [write_offset_64]) - write_entry_typed = self.builder.bitcast(write_entry_ptr, self.node_type.as_pointer().as_pointer()) - self.builder.store(adj_node, write_entry_typed) + write_entry_ptr = self.builder.gep(adj_list_typed, [write_idx_val]) + self.builder.store(adj_node, write_entry_ptr) self.builder.branch(advance_write_block) self.builder.position_at_end(advance_write_block) @@ -1386,6 +1391,7 @@ def compile_to_machine_code(self): target = llvm.Target.from_default_triple() target_machine = target.create_target_machine() + self.target_machine = target_machine mod = llvm.parse_assembly(str(self.module)) mod.verify() From bba424d6d4f77dba54fe1aff71c841bb05699839 Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Fri, 26 Sep 2025 01:40:16 +0530 Subject: [PATCH 07/19] bug fix --- pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py index f2163ab2b..b1dedca04 100644 --- a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py +++ b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py @@ -379,7 +379,7 @@ def _create_add_vertex(self): self.builder.mul(current_capacity, ir.Constant(self.int_type, 2)) ) - ptr_size = ir.Constant(self.int64_type, 8) + ptr_size = ir.Constant(self.int64_type, self.node_type.as_pointer().get_abi_size(self._get_target_data())) new_size_64 = self.builder.mul(self.builder.zext(new_capacity, self.int64_type), ptr_size) new_array_mem = self.builder.call(self.malloc_func, [new_size_64]) new_array = self.builder.bitcast(new_array_mem, self.node_type.as_pointer().as_pointer()) From 5c7b6367a9f56042a0ceea5f1f494f59b4f29ac1 Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Fri, 26 Sep 2025 03:17:48 +0530 Subject: [PATCH 08/19] bug fix --- .../graphs/_backend/cpp/llvm_adjacency_list.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py index b1dedca04..eca2517c6 100644 --- a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py +++ b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py @@ -564,8 +564,8 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): self.builder.branch(no_copy_adj_block) self.builder.position_at_end(no_copy_adj_block) - new_array_void = self.builder.bitcast(new_array, self.void_ptr) - self.builder.store(new_array_void, adj_list_ptr) + new_array_as_void = self.builder.bitcast(new_array, self.void_ptr) + self.builder.store(new_array_as_void, adj_list_ptr) self.builder.store(new_capacity, adj_cap_ptr) self.builder.branch(add_adj_block) @@ -647,13 +647,15 @@ def _create_is_adjacent(self): adj_list_ptr = self.builder.gep(node1_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) adj_count_ptr = self.builder.gep(node1_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 4)]) - adj_list = self.builder.load(adj_list_ptr) + adj_list_void = self.builder.load(adj_list_ptr) adj_count = self.builder.load(adj_count_ptr) - adj_exists = self.builder.icmp_signed('!=', adj_list, ir.Constant(self.void_ptr, None)) + adj_exists = self.builder.icmp_signed('!=', adj_list_void, ir.Constant(self.void_ptr, None)) + count_positive = self.builder.icmp_signed('>', adj_count, ir.Constant(self.int_type, 0)) + should_search = self.builder.and_(adj_exists, count_positive) search_adj_block = self.is_adjacent.append_basic_block(name="search_adjacency") - self.builder.cbranch(adj_exists, search_adj_block, false_block) + self.builder.cbranch(should_search, search_adj_block, false_block) self.builder.position_at_end(search_adj_block) i = self.builder.alloca(self.int_type, name="adj_search_i") @@ -672,7 +674,7 @@ def _create_is_adjacent(self): self.builder.cbranch(loop_condition, adj_check_block, false_block) self.builder.position_at_end(adj_check_block) - adj_list_typed = self.builder.bitcast(adj_list, self.node_type.as_pointer().as_pointer()) + adj_list_typed = self.builder.bitcast(adj_list_void, self.node_type.as_pointer().as_pointer()) adj_entry_ptr = self.builder.gep(adj_list_typed, [i_val]) adj_node = self.builder.load(adj_entry_ptr) From 1b80b9f8b73a04b263752afaf64946e30e629a24 Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Fri, 26 Sep 2025 03:33:44 +0530 Subject: [PATCH 09/19] bug fix --- .../_backend/cpp/llvm_adjacency_list.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py index eca2517c6..2536bd6ca 100644 --- a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py +++ b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py @@ -527,16 +527,19 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): current_count = self.builder.load(adj_count_ptr) current_capacity = self.builder.load(adj_cap_ptr) + needs_allocation = self.builder.icmp_signed('==', current_capacity, ir.Constant(self.int_type, 0)) + needs_resize = self.builder.icmp_signed('>=', current_count, current_capacity) + needs_realloc = self.builder.or_(needs_allocation, needs_resize) + resize_adj_block = self.builder.block.parent.append_basic_block(name="resize_adj") add_adj_block = self.builder.block.parent.append_basic_block(name="add_adj") - needs_resize = self.builder.icmp_signed('>=', current_count, current_capacity) - self.builder.cbranch(needs_resize, resize_adj_block, add_adj_block) + self.builder.cbranch(needs_realloc, resize_adj_block, add_adj_block) self.builder.position_at_end(resize_adj_block) new_capacity = self.builder.select( - self.builder.icmp_signed('==', current_capacity, ir.Constant(self.int_type, 0)), - ir.Constant(self.int_type, 4), + needs_allocation, + ir.Constant(self.int_type, 1), self.builder.mul(current_capacity, ir.Constant(self.int_type, 2)) ) @@ -547,10 +550,10 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): new_array = self.builder.bitcast(new_array_mem, node_ptr_type.as_pointer()) copy_adj_block = self.builder.block.parent.append_basic_block(name="copy_adj") - no_copy_adj_block = self.builder.block.parent.append_basic_block(name="no_copy_adj") + store_new_block = self.builder.block.parent.append_basic_block(name="store_new") - has_existing_adj = self.builder.icmp_signed('>', current_count, ir.Constant(self.int_type, 0)) - self.builder.cbranch(has_existing_adj, copy_adj_block, no_copy_adj_block) + has_existing_data = self.builder.icmp_signed('>', current_count, ir.Constant(self.int_type, 0)) + self.builder.cbranch(has_existing_data, copy_adj_block, store_new_block) self.builder.position_at_end(copy_adj_block) old_adj_array_void = self.builder.load(adj_list_ptr) @@ -559,11 +562,10 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): new_array_void = self.builder.bitcast(new_array, self.void_ptr) self.builder.call(self.memcpy_func, [new_array_void, old_adj_array_void, old_size_bytes]) - self.builder.call(self.free_func, [old_adj_array_void]) - self.builder.branch(no_copy_adj_block) + self.builder.branch(store_new_block) - self.builder.position_at_end(no_copy_adj_block) + self.builder.position_at_end(store_new_block) new_array_as_void = self.builder.bitcast(new_array, self.void_ptr) self.builder.store(new_array_as_void, adj_list_ptr) self.builder.store(new_capacity, adj_cap_ptr) @@ -575,7 +577,6 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): current_count_final = self.builder.load(adj_count_ptr) tgt_slot_ptr = self.builder.gep(adj_array_typed, [current_count_final]) - self.builder.store(tgt_node_ptr, tgt_slot_ptr) new_adj_count = self.builder.add(current_count_final, ir.Constant(self.int_type, 1)) From e7086e46be3a45621e6c792ffcb97ff24b917d12 Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Fri, 26 Sep 2025 03:58:45 +0530 Subject: [PATCH 10/19] bug fix --- .../_backend/cpp/llvm_adjacency_list.py | 85 ++++++++++--------- 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py index 2536bd6ca..89781e0cf 100644 --- a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py +++ b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py @@ -379,7 +379,7 @@ def _create_add_vertex(self): self.builder.mul(current_capacity, ir.Constant(self.int_type, 2)) ) - ptr_size = ir.Constant(self.int64_type, self.node_type.as_pointer().get_abi_size(self._get_target_data())) + ptr_size = ir.Constant(self.int64_type, 8) new_size_64 = self.builder.mul(self.builder.zext(new_capacity, self.int64_type), ptr_size) new_array_mem = self.builder.call(self.malloc_func, [new_size_64]) new_array = self.builder.bitcast(new_array_mem, self.node_type.as_pointer().as_pointer()) @@ -527,60 +527,56 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): current_count = self.builder.load(adj_count_ptr) current_capacity = self.builder.load(adj_cap_ptr) - needs_allocation = self.builder.icmp_signed('==', current_capacity, ir.Constant(self.int_type, 0)) needs_resize = self.builder.icmp_signed('>=', current_count, current_capacity) - needs_realloc = self.builder.or_(needs_allocation, needs_resize) - + resize_adj_block = self.builder.block.parent.append_basic_block(name="resize_adj") add_adj_block = self.builder.block.parent.append_basic_block(name="add_adj") - self.builder.cbranch(needs_realloc, resize_adj_block, add_adj_block) + self.builder.cbranch(needs_resize, resize_adj_block, add_adj_block) self.builder.position_at_end(resize_adj_block) new_capacity = self.builder.select( - needs_allocation, + self.builder.icmp_signed('==', current_capacity, ir.Constant(self.int_type, 0)), ir.Constant(self.int_type, 1), self.builder.mul(current_capacity, ir.Constant(self.int_type, 2)) ) - node_ptr_type = self.node_type.as_pointer() - ptr_size = ir.Constant(self.int64_type, node_ptr_type.get_abi_size(self._get_target_data())) - new_size_elements = self.builder.zext(new_capacity, self.int64_type) - new_array_mem = self.builder.call(self.malloc_func, [self.builder.mul(new_size_elements, ptr_size)]) - new_array = self.builder.bitcast(new_array_mem, node_ptr_type.as_pointer()) - - copy_adj_block = self.builder.block.parent.append_basic_block(name="copy_adj") - store_new_block = self.builder.block.parent.append_basic_block(name="store_new") - - has_existing_data = self.builder.icmp_signed('>', current_count, ir.Constant(self.int_type, 0)) - self.builder.cbranch(has_existing_data, copy_adj_block, store_new_block) - - self.builder.position_at_end(copy_adj_block) - old_adj_array_void = self.builder.load(adj_list_ptr) - old_size_elements = self.builder.zext(current_count, self.int64_type) - old_size_bytes = self.builder.mul(old_size_elements, ptr_size) - - new_array_void = self.builder.bitcast(new_array, self.void_ptr) - self.builder.call(self.memcpy_func, [new_array_void, old_adj_array_void, old_size_bytes]) - self.builder.call(self.free_func, [old_adj_array_void]) - self.builder.branch(store_new_block) - - self.builder.position_at_end(store_new_block) - new_array_as_void = self.builder.bitcast(new_array, self.void_ptr) - self.builder.store(new_array_as_void, adj_list_ptr) + ptr_size = ir.Constant(self.int64_type, 8) + new_size_bytes = self.builder.mul(self.builder.zext(new_capacity, self.int64_type), ptr_size) + new_array_mem = self.builder.call(self.malloc_func, [new_size_bytes]) + + old_adj_list = self.builder.load(adj_list_ptr) + copy_needed = self.builder.icmp_signed('>', current_count, ir.Constant(self.int_type, 0)) + + copy_block = self.builder.block.parent.append_basic_block(name="copy_existing") + store_block = self.builder.block.parent.append_basic_block(name="store_new_array") + + self.builder.cbranch(copy_needed, copy_block, store_block) + + self.builder.position_at_end(copy_block) + old_size_bytes = self.builder.mul(self.builder.zext(current_count, self.int64_type), ptr_size) + self.builder.call(self.memcpy_func, [new_array_mem, old_adj_list, old_size_bytes]) + self.builder.call(self.free_func, [old_adj_list]) + self.builder.branch(store_block) + + self.builder.position_at_end(store_block) + self.builder.store(new_array_mem, adj_list_ptr) self.builder.store(new_capacity, adj_cap_ptr) self.builder.branch(add_adj_block) self.builder.position_at_end(add_adj_block) - adj_array_void = self.builder.load(adj_list_ptr) - adj_array_typed = self.builder.bitcast(adj_array_void, node_ptr_type.as_pointer()) + adj_array = self.builder.load(adj_list_ptr) current_count_final = self.builder.load(adj_count_ptr) - tgt_slot_ptr = self.builder.gep(adj_array_typed, [current_count_final]) - self.builder.store(tgt_node_ptr, tgt_slot_ptr) + offset_bytes = self.builder.mul(self.builder.zext(current_count_final, self.int64_type), ptr_size) + target_addr = self.builder.gep(adj_array, [offset_bytes]) - new_adj_count = self.builder.add(current_count_final, ir.Constant(self.int_type, 1)) - self.builder.store(new_adj_count, adj_count_ptr) + node_as_int = self.builder.ptrtoint(tgt_node_ptr, self.int64_type) + node_bytes = self.builder.bitcast(target_addr, self.int64_type.as_pointer()) + self.builder.store(node_as_int, node_bytes) + + new_count = self.builder.add(current_count_final, ir.Constant(self.int_type, 1)) + self.builder.store(new_count, adj_count_ptr) def _create_hash_insert(self): insert_func_type = ir.FunctionType(self.int_type, @@ -675,9 +671,15 @@ def _create_is_adjacent(self): self.builder.cbranch(loop_condition, adj_check_block, false_block) self.builder.position_at_end(adj_check_block) - adj_list_typed = self.builder.bitcast(adj_list_void, self.node_type.as_pointer().as_pointer()) - adj_entry_ptr = self.builder.gep(adj_list_typed, [i_val]) - adj_node = self.builder.load(adj_entry_ptr) + adj_array = self.builder.load(adj_list_ptr) + + ptr_size = ir.Constant(self.int64_type, 8) + offset_bytes = self.builder.mul(self.builder.zext(i_val, self.int64_type), ptr_size) + entry_addr = self.builder.gep(adj_array, [offset_bytes]) + + entry_as_int_ptr = self.builder.bitcast(entry_addr, self.int64_type.as_pointer()) + entry_as_int = self.builder.load(entry_as_int_ptr) + adj_node = self.builder.inttoptr(entry_as_int, self.node_type.as_pointer()) nodes_match = self.builder.icmp_signed('==', adj_node, node2_ptr) self.builder.cbranch(nodes_match, true_block, adj_next_block) @@ -945,8 +947,7 @@ def _remove_from_adjacency_list(self, src_node_ptr, tgt_node_ptr): adj_list_void = self.builder.load(adj_list_ptr) adj_count = self.builder.load(adj_count_ptr) - node_ptr_type = self.node_type.as_pointer() - adj_list_typed = self.builder.bitcast(adj_list_void, node_ptr_type.as_pointer()) + adj_list_typed = self.builder.bitcast(adj_list_void, self.node_type.as_pointer().as_pointer()) i = self.builder.alloca(self.int_type, name="adj_i") self.builder.store(ir.Constant(self.int_type, 0), i) From 23a138a6d37e5f5f92b8a8782c20f353deccf4af Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Fri, 26 Sep 2025 15:23:49 +0530 Subject: [PATCH 11/19] bug fix --- .../_backend/cpp/llvm_adjacency_list.py | 36 +++++++------------ .../graphs/tests/test_adjacency_list.py | 2 +- 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py index 89781e0cf..a994df94c 100644 --- a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py +++ b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py @@ -528,7 +528,7 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): current_capacity = self.builder.load(adj_cap_ptr) needs_resize = self.builder.icmp_signed('>=', current_count, current_capacity) - + resize_adj_block = self.builder.block.parent.append_basic_block(name="resize_adj") add_adj_block = self.builder.block.parent.append_basic_block(name="add_adj") @@ -547,18 +547,18 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): old_adj_list = self.builder.load(adj_list_ptr) copy_needed = self.builder.icmp_signed('>', current_count, ir.Constant(self.int_type, 0)) - + copy_block = self.builder.block.parent.append_basic_block(name="copy_existing") store_block = self.builder.block.parent.append_basic_block(name="store_new_array") - + self.builder.cbranch(copy_needed, copy_block, store_block) - + self.builder.position_at_end(copy_block) old_size_bytes = self.builder.mul(self.builder.zext(current_count, self.int64_type), ptr_size) self.builder.call(self.memcpy_func, [new_array_mem, old_adj_list, old_size_bytes]) self.builder.call(self.free_func, [old_adj_list]) self.builder.branch(store_block) - + self.builder.position_at_end(store_block) self.builder.store(new_array_mem, adj_list_ptr) self.builder.store(new_capacity, adj_cap_ptr) @@ -566,14 +566,11 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): self.builder.position_at_end(add_adj_block) adj_array = self.builder.load(adj_list_ptr) + adj_array_typed = self.builder.bitcast(adj_array, self.node_type.as_pointer().as_pointer()) current_count_final = self.builder.load(adj_count_ptr) - offset_bytes = self.builder.mul(self.builder.zext(current_count_final, self.int64_type), ptr_size) - target_addr = self.builder.gep(adj_array, [offset_bytes]) - - node_as_int = self.builder.ptrtoint(tgt_node_ptr, self.int64_type) - node_bytes = self.builder.bitcast(target_addr, self.int64_type.as_pointer()) - self.builder.store(node_as_int, node_bytes) + target_addr = self.builder.gep(adj_array_typed, [current_count_final]) + self.builder.store(tgt_node_ptr, target_addr) new_count = self.builder.add(current_count_final, ir.Constant(self.int_type, 1)) self.builder.store(new_count, adj_count_ptr) @@ -655,6 +652,8 @@ def _create_is_adjacent(self): self.builder.cbranch(should_search, search_adj_block, false_block) self.builder.position_at_end(search_adj_block) + adj_array_typed = self.builder.bitcast(adj_list_void, self.node_type.as_pointer().as_pointer()) + i = self.builder.alloca(self.int_type, name="adj_search_i") self.builder.store(ir.Constant(self.int_type, 0), i) @@ -671,15 +670,8 @@ def _create_is_adjacent(self): self.builder.cbranch(loop_condition, adj_check_block, false_block) self.builder.position_at_end(adj_check_block) - adj_array = self.builder.load(adj_list_ptr) - - ptr_size = ir.Constant(self.int64_type, 8) - offset_bytes = self.builder.mul(self.builder.zext(i_val, self.int64_type), ptr_size) - entry_addr = self.builder.gep(adj_array, [offset_bytes]) - - entry_as_int_ptr = self.builder.bitcast(entry_addr, self.int64_type.as_pointer()) - entry_as_int = self.builder.load(entry_as_int_ptr) - adj_node = self.builder.inttoptr(entry_as_int, self.node_type.as_pointer()) + entry_ptr = self.builder.gep(adj_array_typed, [i_val]) + adj_node = self.builder.load(entry_ptr) nodes_match = self.builder.icmp_signed('==', adj_node, node2_ptr) self.builder.cbranch(nodes_match, true_block, adj_next_block) @@ -967,7 +959,6 @@ def _remove_from_adjacency_list(self, src_node_ptr, tgt_node_ptr): self.builder.cbranch(loop_condition, adj_check_block, adj_done_block) self.builder.position_at_end(adj_check_block) - adj_entry_ptr = self.builder.gep(adj_list_typed, [i_val]) adj_node = self.builder.load(adj_entry_ptr) @@ -990,7 +981,6 @@ def _remove_from_adjacency_list(self, src_node_ptr, tgt_node_ptr): self.builder.cbranch(shift_condition, do_adj_shift_block, finish_adj_shift_block) self.builder.position_at_end(do_adj_shift_block) - src_adj_ptr = self.builder.gep(adj_list_typed, [next_shift_idx]) dst_adj_ptr = self.builder.gep(adj_list_typed, [shift_i_val]) @@ -1217,6 +1207,7 @@ def _remove_vertex_from_node_adjacency(self, node_ptr, vertex_name, vertex_name_ self.builder.cbranch(adj_exists, process_adj_block, skip_adj_block) self.builder.position_at_end(process_adj_block) + adj_list_typed = self.builder.bitcast(adj_list, self.node_type.as_pointer().as_pointer()) write_index = self.builder.alloca(self.int_type, name="write_idx") self.builder.store(ir.Constant(self.int_type, 0), write_index) @@ -1238,7 +1229,6 @@ def _remove_vertex_from_node_adjacency(self, node_ptr, vertex_name, vertex_name_ self.builder.cbranch(loop_condition, check_vertex_block, update_count_block) self.builder.position_at_end(check_vertex_block) - adj_list_typed = self.builder.bitcast(adj_list, self.node_type.as_pointer().as_pointer()) read_entry_ptr = self.builder.gep(adj_list_typed, [i_val]) adj_node = self.builder.load(read_entry_ptr) diff --git a/pydatastructs/graphs/tests/test_adjacency_list.py b/pydatastructs/graphs/tests/test_adjacency_list.py index 2282eccfb..24ef96321 100644 --- a/pydatastructs/graphs/tests/test_adjacency_list.py +++ b/pydatastructs/graphs/tests/test_adjacency_list.py @@ -83,7 +83,7 @@ def test_adjacency_list(): assert raises(ValueError, lambda: g2.add_edge('v_4', 'v')) g3 = Graph('a','b',implementation = 'adjacency_list', backend = Backend.LLVM) - g3.add_edge('a', 'b',10) + g3.add_edge('a', 'b') assert g3.is_adjacent('a','b') is True g3.add_vertex('c') g3.add_edge('a','c') From 0280cb84574349c9fbe949deb3ec0193f5e3bf57 Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Fri, 26 Sep 2025 16:14:13 +0530 Subject: [PATCH 12/19] bug fix --- .../_backend/cpp/llvm_adjacency_list.py | 39 +++++++++++-------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py index a994df94c..ae5d9dfda 100644 --- a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py +++ b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py @@ -23,6 +23,9 @@ def __init__(self): self.char_ptr = self.int8_type.as_pointer() self.void_ptr = self.int8_type.as_pointer() + target = llvm.Target.from_default_triple() + self.target_machine = target.create_target_machine() + self._create_structures() self._create_function_declarations() @@ -62,14 +65,16 @@ def _create_structures(self): self.int_type ]) - self.target_machine = None - def _get_target_data(self): - if self.target_machine is None: - target = llvm.Target.from_default_triple() - self.target_machine = target.create_target_machine() return self.target_machine.target_data + def _get_struct_size(self, struct_type): + return struct_type.get_abi_size(self._get_target_data()) + + def _get_pointer_size(self): + import struct + return struct.calcsize("P") + def _create_function_declarations(self): malloc_type = ir.FunctionType(self.void_ptr, [self.int64_type]) @@ -263,7 +268,7 @@ def _create_node_functions(self): name_ptr, name_len, node_id = self.create_node.args - node_size = ir.Constant(self.int64_type, self.node_type.get_abi_size(self._get_target_data())) + node_size = ir.Constant(self.int64_type, self._get_struct_size(self.node_type)) node_mem = self.builder.call(self.malloc_func, [node_size]) node_ptr = self.builder.bitcast(node_mem, self.node_type.as_pointer()) @@ -293,7 +298,7 @@ def _create_graph_init(self): block = self.graph_init.append_basic_block(name="entry") self.builder = ir.IRBuilder(block) - graph_size = ir.Constant(self.int64_type, self.graph_type.get_abi_size(self._get_target_data())) + graph_size = ir.Constant(self.int64_type, self._get_struct_size(self.graph_type)) graph_mem = self.builder.call(self.malloc_func, [graph_size]) graph_ptr = self.builder.bitcast(graph_mem, self.graph_type.as_pointer()) @@ -379,7 +384,10 @@ def _create_add_vertex(self): self.builder.mul(current_capacity, ir.Constant(self.int_type, 2)) ) - ptr_size = ir.Constant(self.int64_type, 8) + target_data = self._get_target_data() + ptr_type = self.node_type.as_pointer() + ptr_size = ir.Constant(self.int64_type, self._get_pointer_size()) + new_size_64 = self.builder.mul(self.builder.zext(new_capacity, self.int64_type), ptr_size) new_array_mem = self.builder.call(self.malloc_func, [new_size_64]) new_array = self.builder.bitcast(new_array_mem, self.node_type.as_pointer().as_pointer()) @@ -499,7 +507,7 @@ def _create_add_edge(self): self.builder.position_at_end(create_edge_block) - edge_size = ir.Constant(self.int64_type, self.edge_type.get_abi_size(self._get_target_data())) + edge_size = ir.Constant(self.int64_type, self._get_struct_size(self.edge_type)) edge_mem = self.builder.call(self.malloc_func, [edge_size]) edge_ptr = self.builder.bitcast(edge_mem, self.edge_type.as_pointer()) @@ -541,7 +549,10 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): self.builder.mul(current_capacity, ir.Constant(self.int_type, 2)) ) - ptr_size = ir.Constant(self.int64_type, 8) + target_data = self._get_target_data() + ptr_type = self.node_type.as_pointer() + ptr_size = ir.Constant(self.int64_type, self._get_pointer_size()) + new_size_bytes = self.builder.mul(self.builder.zext(new_capacity, self.int64_type), ptr_size) new_array_mem = self.builder.call(self.malloc_func, [new_size_bytes]) @@ -585,7 +596,7 @@ def _create_hash_insert(self): table_ptr, key, key_len, value = self.hash_insert.args - entry_size = ir.Constant(self.int64_type, self.hash_entry_type.get_abi_size(self._get_target_data())) + entry_size = ir.Constant(self.int64_type, self._get_struct_size(self.hash_entry_type)) entry_mem = self.builder.call(self.malloc_func, [entry_size]) entry_ptr = self.builder.bitcast(entry_mem, self.hash_entry_type.as_pointer()) @@ -1383,14 +1394,10 @@ def _create_graph_cleanup(self): def compile_to_machine_code(self): - target = llvm.Target.from_default_triple() - target_machine = target.create_target_machine() - self.target_machine = target_machine - mod = llvm.parse_assembly(str(self.module)) mod.verify() - ee = llvm.create_mcjit_compiler(mod, target_machine) + ee = llvm.create_mcjit_compiler(mod, self.target_machine) ee.finalize_object() functions = {} From 296eb49f1b2fd8718cefe1b694308e7b474893fc Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Fri, 26 Sep 2025 20:31:56 +0530 Subject: [PATCH 13/19] bug fix --- pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py index ae5d9dfda..15ecb9aff 100644 --- a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py +++ b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py @@ -25,6 +25,7 @@ def __init__(self): target = llvm.Target.from_default_triple() self.target_machine = target.create_target_machine() + self.target_data = self.target_machine.target_data self._create_structures() @@ -41,20 +42,20 @@ def _create_structures(self): self.void_ptr, self.int_type, self.int_type - ]) + ], packed=True) self.edge_type = ir.LiteralStructType([ self.node_type.as_pointer(), self.node_type.as_pointer(), self.double_type - ]) + ], packed=True) self.hash_entry_type = ir.LiteralStructType([ self.char_ptr, self.int_type, self.void_ptr, self.void_ptr - ]) + ], packed=True) self.graph_type = ir.LiteralStructType([ self.node_type.as_pointer().as_pointer(), @@ -63,7 +64,7 @@ def _create_structures(self): self.void_ptr, self.void_ptr, self.int_type - ]) + ], packed=True) def _get_target_data(self): return self.target_machine.target_data From ef39b97672109da11ebccd5b5c3e6135fd92e626 Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Fri, 26 Sep 2025 21:04:19 +0530 Subject: [PATCH 14/19] bug fix --- .../graphs/_backend/cpp/AdjacencyListLLVM.hpp | 28 +++++++++------ .../_backend/cpp/llvm_adjacency_list.py | 34 ++++++++++++++++--- pydatastructs/utils/_extensions.py | 4 +-- 3 files changed, 50 insertions(+), 16 deletions(-) diff --git a/pydatastructs/graphs/_backend/cpp/AdjacencyListLLVM.hpp b/pydatastructs/graphs/_backend/cpp/AdjacencyListLLVM.hpp index 4a5f85dc5..1ec184daf 100644 --- a/pydatastructs/graphs/_backend/cpp/AdjacencyListLLVM.hpp +++ b/pydatastructs/graphs/_backend/cpp/AdjacencyListLLVM.hpp @@ -69,26 +69,34 @@ static PyObject* initialize_llvm_backend(PyObject* self, PyObject* args) { return nullptr; } - llvm_graph_init = (GraphInitFunc)PyLong_AsVoidPtr(init_ptr); - llvm_add_vertex = (AddVertexFunc)PyLong_AsVoidPtr(add_vertex_ptr); - llvm_add_edge = (AddEdgeFunc)PyLong_AsVoidPtr(add_edge_ptr); - llvm_is_adjacent = (IsAdjacentFunc)PyLong_AsVoidPtr(is_adjacent_ptr); - llvm_remove_vertex = (RemoveVertexFunc)PyLong_AsVoidPtr(remove_vertex_ptr); - llvm_remove_edge = (RemoveEdgeFunc)PyLong_AsVoidPtr(remove_edge_ptr); - llvm_graph_cleanup = (GraphCleanupFunc)PyLong_AsVoidPtr(cleanup_ptr); + void* raw_init = PyLong_AsVoidPtr(init_ptr); + void* raw_add_vertex = PyLong_AsVoidPtr(add_vertex_ptr); + void* raw_add_edge = PyLong_AsVoidPtr(add_edge_ptr); + void* raw_is_adjacent = PyLong_AsVoidPtr(is_adjacent_ptr); + void* raw_remove_vertex = PyLong_AsVoidPtr(remove_vertex_ptr); + void* raw_remove_edge = PyLong_AsVoidPtr(remove_edge_ptr); + void* raw_cleanup = PyLong_AsVoidPtr(cleanup_ptr); if (PyErr_Occurred()) { PyErr_SetString(PyExc_ValueError, "Failed to convert function pointers"); return nullptr; } - if (!llvm_graph_init || !llvm_add_vertex || !llvm_add_edge || - !llvm_is_adjacent || !llvm_remove_vertex || !llvm_remove_edge || - !llvm_graph_cleanup) { + if (!raw_init || !raw_add_vertex || !raw_add_edge || + !raw_is_adjacent || !raw_remove_vertex || !raw_remove_edge || + !raw_cleanup) { PyErr_SetString(PyExc_ValueError, "One or more function pointers are null"); return nullptr; } + llvm_graph_init = (GraphInitFunc)raw_init; + llvm_add_vertex = (AddVertexFunc)raw_add_vertex; + llvm_add_edge = (AddEdgeFunc)raw_add_edge; + llvm_is_adjacent = (IsAdjacentFunc)raw_is_adjacent; + llvm_remove_vertex = (RemoveVertexFunc)raw_remove_vertex; + llvm_remove_edge = (RemoveEdgeFunc)raw_remove_edge; + llvm_graph_cleanup = (GraphCleanupFunc)raw_cleanup; + llvm_backend_initialized = true; Py_RETURN_NONE; diff --git a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py index 15ecb9aff..b0cd65a9b 100644 --- a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py +++ b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py @@ -23,9 +23,12 @@ def __init__(self): self.char_ptr = self.int8_type.as_pointer() self.void_ptr = self.int8_type.as_pointer() + triple = self._get_target_triple() target = llvm.Target.from_default_triple() self.target_machine = target.create_target_machine() self.target_data = self.target_machine.target_data + self.module.triple = triple + self.module.data_layout = str(self.target_data) self._create_structures() @@ -33,6 +36,29 @@ def __init__(self): self._create_graph_functions() + def _get_target_triple(self): + import platform + system = platform.system().lower() + machine = platform.machine().lower() + + if system == "darwin": # macOS + if machine in ["arm64", "aarch64"]: + return "arm64-apple-darwin" + else: # x86_64 + return "x86_64-apple-darwin" + elif system == "linux": + if machine in ["arm64", "aarch64"]: + return "aarch64-unknown-linux-gnu" + else: # x86_64 + return "x86_64-unknown-linux-gnu" + elif system == "windows": + if machine in ["arm64", "aarch64"]: + return "aarch64-pc-windows-msvc" + else: + return "x86_64-pc-windows-msvc" + else: + return llvm.get_default_triple() + def _create_structures(self): self.node_type = ir.LiteralStructType([ @@ -42,20 +68,20 @@ def _create_structures(self): self.void_ptr, self.int_type, self.int_type - ], packed=True) + ]) self.edge_type = ir.LiteralStructType([ self.node_type.as_pointer(), self.node_type.as_pointer(), self.double_type - ], packed=True) + ]) self.hash_entry_type = ir.LiteralStructType([ self.char_ptr, self.int_type, self.void_ptr, self.void_ptr - ], packed=True) + ]) self.graph_type = ir.LiteralStructType([ self.node_type.as_pointer().as_pointer(), @@ -64,7 +90,7 @@ def _create_structures(self): self.void_ptr, self.void_ptr, self.int_type - ], packed=True) + ]) def _get_target_data(self): return self.target_machine.target_data diff --git a/pydatastructs/utils/_extensions.py b/pydatastructs/utils/_extensions.py index 64d6250ec..cbed818ca 100644 --- a/pydatastructs/utils/_extensions.py +++ b/pydatastructs/utils/_extensions.py @@ -36,11 +36,11 @@ Extension( nodes, sources=nodes_sources, - extra_compile_args=["-std=c++17"], + extra_compile_args=extra_compile_args, ), Extension( graph_utils, sources=graph_utils_sources, - extra_compile_args=["-std=c++17"], + extra_compile_args=extra_compile_args, ), ] From cdf747d5d434bd7ea14d7ef251ac793ad45ac876 Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Fri, 26 Sep 2025 21:20:18 +0530 Subject: [PATCH 15/19] bug fix --- pydatastructs/graphs/tests/test_adjacency_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydatastructs/graphs/tests/test_adjacency_list.py b/pydatastructs/graphs/tests/test_adjacency_list.py index 24ef96321..e8c92c9eb 100644 --- a/pydatastructs/graphs/tests/test_adjacency_list.py +++ b/pydatastructs/graphs/tests/test_adjacency_list.py @@ -84,7 +84,7 @@ def test_adjacency_list(): g3 = Graph('a','b',implementation = 'adjacency_list', backend = Backend.LLVM) g3.add_edge('a', 'b') - assert g3.is_adjacent('a','b') is True + #assert g3.is_adjacent('a','b') is True g3.add_vertex('c') g3.add_edge('a','c') assert g3.is_adjacent('a','c') is True From e44eae768eedee7d297326ea05db69c18e7ec27f Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Fri, 26 Sep 2025 22:02:10 +0530 Subject: [PATCH 16/19] bug fix --- .../_backend/cpp/llvm_adjacency_list.py | 83 ++++++++----------- 1 file changed, 36 insertions(+), 47 deletions(-) diff --git a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py index b0cd65a9b..75d96203b 100644 --- a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py +++ b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py @@ -130,8 +130,6 @@ def _create_graph_functions(self): self._create_graph_cleanup() def _compare_strings(self, str1, str2, length): - """Compare two strings byte by byte""" - same_ptr = self.builder.icmp_signed('==', str1, str2) true_block = self.builder.block.parent.append_basic_block(name="strings_equal") @@ -172,17 +170,15 @@ def _compare_strings(self, str1, str2, length): self.builder.branch(loop_block) self.builder.position_at_end(true_block) - result_true = ir.Constant(self.bool_type, 1) self.builder.branch(merge_block) self.builder.position_at_end(false_block) - result_false = ir.Constant(self.bool_type, 0) self.builder.branch(merge_block) self.builder.position_at_end(merge_block) phi = self.builder.phi(self.bool_type, name="string_cmp_result") - phi.add_incoming(result_true, true_block) - phi.add_incoming(result_false, false_block) + phi.add_incoming(ir.Constant(self.bool_type, 1), true_block) + phi.add_incoming(ir.Constant(self.bool_type, 0), false_block) return phi @@ -650,8 +646,18 @@ def _create_is_adjacent(self): [self.graph_type.as_pointer(), self.char_ptr, self.int_type, self.char_ptr, self.int_type]) self.is_adjacent = ir.Function(self.module, is_adj_type, name="is_adjacent") - block = self.is_adjacent.append_basic_block(name="entry") - self.builder = ir.IRBuilder(block) + entry_block = self.is_adjacent.append_basic_block(name="entry") + node1_found_block = self.is_adjacent.append_basic_block(name="node1_found") + check_adjacency_block = self.is_adjacent.append_basic_block(name="check_adjacency") + search_adj_block = self.is_adjacent.append_basic_block(name="search_adjacency") + adj_loop_block = self.is_adjacent.append_basic_block(name="adj_search_loop") + adj_check_block = self.is_adjacent.append_basic_block(name="adj_check_node") + adj_next_block = self.is_adjacent.append_basic_block(name="adj_next") + adj_loop_end_block = self.is_adjacent.append_basic_block(name="adj_loop_end") + true_block = self.is_adjacent.append_basic_block(name="return_true") + false_block = self.is_adjacent.append_basic_block(name="return_false") + + self.builder.position_at_end(entry_block) graph_ptr, node1_name, node1_name_len, node2_name, node2_name_len = self.is_adjacent.args @@ -659,17 +665,12 @@ def _create_is_adjacent(self): node_map = self.builder.load(node_map_ptr) node1_void = self.builder.call(self.hash_lookup, [node_map, node1_name, node1_name_len]) - node1_found_block = self.is_adjacent.append_basic_block(name="node1_found") - false_block = self.is_adjacent.append_basic_block(name="return_false") - node1_exists = self.builder.icmp_signed('!=', node1_void, ir.Constant(self.void_ptr, None)) self.builder.cbranch(node1_exists, node1_found_block, false_block) self.builder.position_at_end(node1_found_block) node2_void = self.builder.call(self.hash_lookup, [node_map, node2_name, node2_name_len]) node2_exists = self.builder.icmp_signed('!=', node2_void, ir.Constant(self.void_ptr, None)) - - check_adjacency_block = self.is_adjacent.append_basic_block(name="check_adjacency") self.builder.cbranch(node2_exists, check_adjacency_block, false_block) self.builder.position_at_end(check_adjacency_block) @@ -686,26 +687,18 @@ def _create_is_adjacent(self): count_positive = self.builder.icmp_signed('>', adj_count, ir.Constant(self.int_type, 0)) should_search = self.builder.and_(adj_exists, count_positive) - search_adj_block = self.is_adjacent.append_basic_block(name="search_adjacency") self.builder.cbranch(should_search, search_adj_block, false_block) self.builder.position_at_end(search_adj_block) adj_array_typed = self.builder.bitcast(adj_list_void, self.node_type.as_pointer().as_pointer()) - i = self.builder.alloca(self.int_type, name="adj_search_i") self.builder.store(ir.Constant(self.int_type, 0), i) - - adj_loop_block = self.is_adjacent.append_basic_block(name="adj_search_loop") - adj_check_block = self.is_adjacent.append_basic_block(name="adj_check_node") - true_block = self.is_adjacent.append_basic_block(name="return_true") - adj_next_block = self.is_adjacent.append_basic_block(name="adj_next") - self.builder.branch(adj_loop_block) self.builder.position_at_end(adj_loop_block) i_val = self.builder.load(i) loop_condition = self.builder.icmp_signed('<', i_val, adj_count) - self.builder.cbranch(loop_condition, adj_check_block, false_block) + self.builder.cbranch(loop_condition, adj_check_block, adj_loop_end_block) self.builder.position_at_end(adj_check_block) entry_ptr = self.builder.gep(adj_array_typed, [i_val]) @@ -722,6 +715,9 @@ def _create_is_adjacent(self): self.builder.position_at_end(true_block) self.builder.ret(ir.Constant(self.bool_type, 1)) + self.builder.position_at_end(adj_loop_end_block) + self.builder.ret(ir.Constant(self.bool_type, 0)) + self.builder.position_at_end(false_block) self.builder.ret(ir.Constant(self.bool_type, 0)) @@ -1323,12 +1319,20 @@ def _remove_vertex_from_node_adjacency(self, node_ptr, vertex_name, vertex_name_ def _string_contains_substring(self, haystack, haystack_len, needle, needle_len): - too_long = self.builder.icmp_signed('>', needle_len, haystack_len) - + # Define all basic blocks upfront + entry_block = self.builder.block false_block = self.builder.block.parent.append_basic_block(name="substr_false") search_block = self.builder.block.parent.append_basic_block(name="substr_search") + outer_loop_block = self.builder.block.parent.append_basic_block(name="outer_search_loop") + inner_loop_start = self.builder.block.parent.append_basic_block(name="inner_loop_start") + check_char_block = self.builder.block.parent.append_basic_block(name="check_char") + char_match_block = self.builder.block.parent.append_basic_block(name="char_match") + continue_outer_block = self.builder.block.parent.append_basic_block(name="continue_outer") true_block = self.builder.block.parent.append_basic_block(name="substr_true") + merge_block = self.builder.block.parent.append_basic_block(name="merge") + self.builder.position_at_end(entry_block) + too_long = self.builder.icmp_signed('>', needle_len, haystack_len) self.builder.cbranch(too_long, false_block, search_block) self.builder.position_at_end(search_block) @@ -1337,22 +1341,14 @@ def _string_contains_substring(self, haystack, haystack_len, needle, needle_len) i = self.builder.alloca(self.int_type, name="search_i") self.builder.store(ir.Constant(self.int_type, 0), i) - - outer_loop_block = self.builder.block.parent.append_basic_block(name="outer_search_loop") - inner_loop_block = self.builder.block.parent.append_basic_block(name="inner_search_loop") - check_char_block = self.builder.block.parent.append_basic_block(name="check_char") - match_found_block = self.builder.block.parent.append_basic_block(name="match_found") - no_match_block = self.builder.block.parent.append_basic_block(name="no_match") - continue_outer_block = self.builder.block.parent.append_basic_block(name="continue_outer") - self.builder.branch(outer_loop_block) self.builder.position_at_end(outer_loop_block) i_val = self.builder.load(i) outer_condition = self.builder.icmp_signed('<', i_val, max_start) - self.builder.cbranch(outer_condition, inner_loop_block, false_block) + self.builder.cbranch(outer_condition, inner_loop_start, false_block) - self.builder.position_at_end(inner_loop_block) + self.builder.position_at_end(inner_loop_start) j = self.builder.alloca(self.int_type, name="search_j") self.builder.store(ir.Constant(self.int_type, 0), j) self.builder.branch(check_char_block) @@ -1360,19 +1356,18 @@ def _string_contains_substring(self, haystack, haystack_len, needle, needle_len) self.builder.position_at_end(check_char_block) j_val = self.builder.load(j) inner_condition = self.builder.icmp_signed('<', j_val, needle_len) - self.builder.cbranch(inner_condition, no_match_block, match_found_block) - self.builder.position_at_end(no_match_block) + match_or_mismatch = self.builder.block.parent.append_basic_block(name="match_or_mismatch") + self.builder.cbranch(inner_condition, match_or_mismatch, true_block) + + self.builder.position_at_end(match_or_mismatch) haystack_idx = self.builder.add(i_val, j_val) haystack_char_ptr = self.builder.gep(haystack, [haystack_idx]) needle_char_ptr = self.builder.gep(needle, [j_val]) - haystack_char = self.builder.load(haystack_char_ptr) needle_char = self.builder.load(needle_char_ptr) - chars_match = self.builder.icmp_signed('==', haystack_char, needle_char) - char_match_block = self.builder.block.parent.append_basic_block(name="char_match") self.builder.cbranch(chars_match, char_match_block, continue_outer_block) self.builder.position_at_end(char_match_block) @@ -1380,27 +1375,21 @@ def _string_contains_substring(self, haystack, haystack_len, needle, needle_len) self.builder.store(next_j, j) self.builder.branch(check_char_block) - self.builder.position_at_end(match_found_block) - self.builder.branch(true_block) - self.builder.position_at_end(continue_outer_block) next_i = self.builder.add(i_val, ir.Constant(self.int_type, 1)) self.builder.store(next_i, i) self.builder.branch(outer_loop_block) self.builder.position_at_end(true_block) - result_true = ir.Constant(self.bool_type, 1) - merge_block = self.builder.block.parent.append_basic_block(name="merge") self.builder.branch(merge_block) self.builder.position_at_end(false_block) - result_false = ir.Constant(self.bool_type, 0) self.builder.branch(merge_block) self.builder.position_at_end(merge_block) phi = self.builder.phi(self.bool_type, name="substr_result") - phi.add_incoming(result_true, true_block) - phi.add_incoming(result_false, false_block) + phi.add_incoming(ir.Constant(self.bool_type, 1), true_block) + phi.add_incoming(ir.Constant(self.bool_type, 0), false_block) return phi From 182df7508c6b0dab1b692be710ea3aeb8d708bd7 Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Fri, 26 Sep 2025 22:41:56 +0530 Subject: [PATCH 17/19] bug fix --- .../_backend/cpp/llvm_adjacency_list.py | 32 ++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py index 75d96203b..06242af46 100644 --- a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py +++ b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py @@ -130,35 +130,27 @@ def _create_graph_functions(self): self._create_graph_cleanup() def _compare_strings(self, str1, str2, length): - same_ptr = self.builder.icmp_signed('==', str1, str2) - + entry_block = self.builder.block + loop_block = self.builder.block.parent.append_basic_block(name="str_cmp_loop") + check_block = self.builder.block.parent.append_basic_block(name="str_cmp_check") true_block = self.builder.block.parent.append_basic_block(name="strings_equal") false_block = self.builder.block.parent.append_basic_block(name="strings_not_equal") - compare_block = self.builder.block.parent.append_basic_block(name="compare_bytes") merge_block = self.builder.block.parent.append_basic_block(name="string_cmp_merge") - self.builder.cbranch(same_ptr, true_block, compare_block) - - self.builder.position_at_end(compare_block) i = self.builder.alloca(self.int_type, name="str_cmp_i") self.builder.store(ir.Constant(self.int_type, 0), i) - - loop_block = self.builder.block.parent.append_basic_block(name="str_cmp_loop") - check_block = self.builder.block.parent.append_basic_block(name="str_cmp_check") - self.builder.branch(loop_block) self.builder.position_at_end(loop_block) i_val = self.builder.load(i) - loop_condition = self.builder.icmp_signed('<', i_val, length) - self.builder.cbranch(loop_condition, check_block, true_block) + loop_cond = self.builder.icmp_signed('<', i_val, length) + self.builder.cbranch(loop_cond, check_block, true_block) self.builder.position_at_end(check_block) char1_ptr = self.builder.gep(str1, [i_val]) char2_ptr = self.builder.gep(str2, [i_val]) char1 = self.builder.load(char1_ptr) char2 = self.builder.load(char2_ptr) - chars_equal = self.builder.icmp_signed('==', char1, char2) next_char_block = self.builder.block.parent.append_basic_block(name="next_char") @@ -247,7 +239,7 @@ def _create_hash_functions(self): self.builder.position_at_end(loop_block) current_val = self.builder.load(current) - is_null = self.builder.icmp_signed('==', current_val, ir.Constant(self.void_ptr, None)) + is_null = self.builder.icmp_unsigned('==', current_val, ir.Constant(self.void_ptr, None)) self.builder.cbranch(is_null, not_found_block, check_block) self.builder.position_at_end(check_block) @@ -257,7 +249,7 @@ def _create_hash_functions(self): entry_key_len_ptr = self.builder.gep(entry_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) entry_key_len = self.builder.load(entry_key_len_ptr) - len_match = self.builder.icmp_signed('==', entry_key_len, key_len) + len_match = self.builder.icmp_unsigned('==', entry_key_len, key_len) content_check_block = self.hash_lookup.append_basic_block(name="content_check") next_block = self.hash_lookup.append_basic_block(name="next_entry") @@ -298,8 +290,14 @@ def _create_node_functions(self): id_ptr = self.builder.gep(node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 0)]) self.builder.store(node_id, id_ptr) + name_buf_size = self.builder.zext(name_len, self.int64_type) + name_buf = self.builder.call(self.malloc_func, [name_buf_size]) + name_dest = self.builder.bitcast(name_buf, self.char_ptr) + name_len_64 = self.builder.zext(name_len, self.int64_type) + self.builder.call(self.memcpy_func, [name_dest, name_ptr, name_len_64]) + name_field_ptr = self.builder.gep(node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) - self.builder.store(name_ptr, name_field_ptr) + self.builder.store(name_dest, name_field_ptr) name_len_ptr = self.builder.gep(node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 2)]) self.builder.store(name_len, name_len_ptr) @@ -703,7 +701,6 @@ def _create_is_adjacent(self): self.builder.position_at_end(adj_check_block) entry_ptr = self.builder.gep(adj_array_typed, [i_val]) adj_node = self.builder.load(entry_ptr) - nodes_match = self.builder.icmp_signed('==', adj_node, node2_ptr) self.builder.cbranch(nodes_match, true_block, adj_next_block) @@ -1319,7 +1316,6 @@ def _remove_vertex_from_node_adjacency(self, node_ptr, vertex_name, vertex_name_ def _string_contains_substring(self, haystack, haystack_len, needle, needle_len): - # Define all basic blocks upfront entry_block = self.builder.block false_block = self.builder.block.parent.append_basic_block(name="substr_false") search_block = self.builder.block.parent.append_basic_block(name="substr_search") From 7590e688f9f08584ed8723be2b611cf3b184661e Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Sat, 27 Sep 2025 00:22:25 +0530 Subject: [PATCH 18/19] bug fix --- .../_backend/cpp/llvm_adjacency_list.py | 148 +++++++++++++----- 1 file changed, 109 insertions(+), 39 deletions(-) diff --git a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py index 06242af46..47fbc8e88 100644 --- a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py +++ b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py @@ -554,13 +554,62 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): adj_cap_ptr = self.builder.gep(src_node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 5)]) current_count = self.builder.load(adj_count_ptr) - current_capacity = self.builder.load(adj_cap_ptr) + tgt_node_name_ptr = self.builder.gep(tgt_node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) + tgt_node_name = self.builder.load(tgt_node_name_ptr) + tgt_node_name_len_ptr = self.builder.gep(tgt_node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 2)]) + tgt_node_name_len = self.builder.load(tgt_node_name_len_ptr) - needs_resize = self.builder.icmp_signed('>=', current_count, current_capacity) + adj_list_void = self.builder.load(adj_list_ptr) + adj_list_exists = self.builder.icmp_signed('!=', adj_list_void, ir.Constant(self.void_ptr, None)) + + check_duplicates_block = self.builder.block.parent.append_basic_block(name="check_duplicates") + proceed_add_block = self.builder.block.parent.append_basic_block(name="proceed_add") + self.builder.cbranch(adj_list_exists, check_duplicates_block, proceed_add_block) + + self.builder.position_at_end(check_duplicates_block) + adj_list_typed = self.builder.bitcast(adj_list_void, self.node_type.as_pointer().as_pointer()) + dup_i = self.builder.alloca(self.int_type, name="dup_check_i") + self.builder.store(ir.Constant(self.int_type, 0), dup_i) + + dup_loop_block = self.builder.block.parent.append_basic_block(name="dup_check_loop") + dup_check_block = self.builder.block.parent.append_basic_block(name="dup_check_node") + dup_next_block = self.builder.block.parent.append_basic_block(name="dup_next") + self.builder.branch(dup_loop_block) + + self.builder.position_at_end(dup_loop_block) + dup_i_val = self.builder.load(dup_i) + current_count_loop = self.builder.load(adj_count_ptr) + dup_loop_condition = self.builder.icmp_signed('<', dup_i_val, current_count_loop) + self.builder.cbranch(dup_loop_condition, dup_check_block, proceed_add_block) + + self.builder.position_at_end(dup_check_block) + existing_node_ptr = self.builder.gep(adj_list_typed, [dup_i_val]) + existing_node = self.builder.load(existing_node_ptr) + existing_name_ptr = self.builder.gep(existing_node, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) + existing_name = self.builder.load(existing_name_ptr) + existing_name_len_ptr = self.builder.gep(existing_node, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 2)]) + existing_name_len = self.builder.load(existing_name_len_ptr) + + len_match = self.builder.icmp_signed('==', existing_name_len, tgt_node_name_len) + dup_content_check_block = self.builder.block.parent.append_basic_block(name="dup_content_check") + self.builder.cbranch(len_match, dup_content_check_block, dup_next_block) + + self.builder.position_at_end(dup_content_check_block) + names_match = self._compare_strings(existing_name, tgt_node_name, tgt_node_name_len) + self.builder.cbranch(names_match, proceed_add_block, dup_next_block) + + self.builder.position_at_end(dup_next_block) + next_dup_i = self.builder.add(dup_i_val, ir.Constant(self.int_type, 1)) + self.builder.store(next_dup_i, dup_i) + self.builder.branch(dup_loop_block) + + self.builder.position_at_end(proceed_add_block) + current_capacity = self.builder.load(adj_cap_ptr) + current_count_final = self.builder.load(adj_count_ptr) + needs_resize = self.builder.icmp_signed('>=', current_count_final, current_capacity) resize_adj_block = self.builder.block.parent.append_basic_block(name="resize_adj") add_adj_block = self.builder.block.parent.append_basic_block(name="add_adj") - self.builder.cbranch(needs_resize, resize_adj_block, add_adj_block) self.builder.position_at_end(resize_adj_block) @@ -569,24 +618,18 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): ir.Constant(self.int_type, 1), self.builder.mul(current_capacity, ir.Constant(self.int_type, 2)) ) - - target_data = self._get_target_data() - ptr_type = self.node_type.as_pointer() ptr_size = ir.Constant(self.int64_type, self._get_pointer_size()) - new_size_bytes = self.builder.mul(self.builder.zext(new_capacity, self.int64_type), ptr_size) new_array_mem = self.builder.call(self.malloc_func, [new_size_bytes]) old_adj_list = self.builder.load(adj_list_ptr) - copy_needed = self.builder.icmp_signed('>', current_count, ir.Constant(self.int_type, 0)) - + copy_needed = self.builder.icmp_signed('>', current_count_final, ir.Constant(self.int_type, 0)) copy_block = self.builder.block.parent.append_basic_block(name="copy_existing") store_block = self.builder.block.parent.append_basic_block(name="store_new_array") - self.builder.cbranch(copy_needed, copy_block, store_block) self.builder.position_at_end(copy_block) - old_size_bytes = self.builder.mul(self.builder.zext(current_count, self.int64_type), ptr_size) + old_size_bytes = self.builder.mul(self.builder.zext(current_count_final, self.int64_type), ptr_size) self.builder.call(self.memcpy_func, [new_array_mem, old_adj_list, old_size_bytes]) self.builder.call(self.free_func, [old_adj_list]) self.builder.branch(store_block) @@ -599,11 +642,8 @@ def _add_to_adjacency_list(self, src_node_ptr, tgt_node_ptr): self.builder.position_at_end(add_adj_block) adj_array = self.builder.load(adj_list_ptr) adj_array_typed = self.builder.bitcast(adj_array, self.node_type.as_pointer().as_pointer()) - - current_count_final = self.builder.load(adj_count_ptr) target_addr = self.builder.gep(adj_array_typed, [current_count_final]) self.builder.store(tgt_node_ptr, target_addr) - new_count = self.builder.add(current_count_final, ir.Constant(self.int_type, 1)) self.builder.store(new_count, adj_count_ptr) @@ -640,29 +680,32 @@ def _create_hash_insert(self): self.builder.ret(ir.Constant(self.int_type, 0)) def _create_is_adjacent(self): - is_adj_type = ir.FunctionType(self.bool_type, - [self.graph_type.as_pointer(), self.char_ptr, self.int_type, self.char_ptr, self.int_type]) + is_adj_type = ir.FunctionType( + self.bool_type, + [self.graph_type.as_pointer(), self.char_ptr, self.int_type, + self.char_ptr, self.int_type] + ) self.is_adjacent = ir.Function(self.module, is_adj_type, name="is_adjacent") - entry_block = self.is_adjacent.append_basic_block(name="entry") - node1_found_block = self.is_adjacent.append_basic_block(name="node1_found") - check_adjacency_block = self.is_adjacent.append_basic_block(name="check_adjacency") - search_adj_block = self.is_adjacent.append_basic_block(name="search_adjacency") - adj_loop_block = self.is_adjacent.append_basic_block(name="adj_search_loop") - adj_check_block = self.is_adjacent.append_basic_block(name="adj_check_node") - adj_next_block = self.is_adjacent.append_basic_block(name="adj_next") - adj_loop_end_block = self.is_adjacent.append_basic_block(name="adj_loop_end") - true_block = self.is_adjacent.append_basic_block(name="return_true") - false_block = self.is_adjacent.append_basic_block(name="return_false") + entry_block = self.is_adjacent.append_basic_block("entry") + node1_found_block = self.is_adjacent.append_basic_block("node1_found") + check_adjacency_block = self.is_adjacent.append_basic_block("check_adjacency") + search_adj_block = self.is_adjacent.append_basic_block("search_adj") + adj_loop_block = self.is_adjacent.append_basic_block("adj_loop") + adj_check_block = self.is_adjacent.append_basic_block("adj_check") + adj_next_block = self.is_adjacent.append_basic_block("adj_next") + content_check_block = self.is_adjacent.append_basic_block("content_check") + adj_loop_end_block = self.is_adjacent.append_basic_block("adj_loop_end") + true_block = self.is_adjacent.append_basic_block("return_true") + false_block = self.is_adjacent.append_basic_block("return_false") self.builder.position_at_end(entry_block) - graph_ptr, node1_name, node1_name_len, node2_name, node2_name_len = self.is_adjacent.args node_map_ptr = self.builder.gep(graph_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) node_map = self.builder.load(node_map_ptr) - node1_void = self.builder.call(self.hash_lookup, [node_map, node1_name, node1_name_len]) + node1_void = self.builder.call(self.hash_lookup, [node_map, node1_name, node1_name_len]) node1_exists = self.builder.icmp_signed('!=', node1_void, ir.Constant(self.void_ptr, None)) self.builder.cbranch(node1_exists, node1_found_block, false_block) @@ -684,40 +727,50 @@ def _create_is_adjacent(self): adj_exists = self.builder.icmp_signed('!=', adj_list_void, ir.Constant(self.void_ptr, None)) count_positive = self.builder.icmp_signed('>', adj_count, ir.Constant(self.int_type, 0)) should_search = self.builder.and_(adj_exists, count_positive) - self.builder.cbranch(should_search, search_adj_block, false_block) self.builder.position_at_end(search_adj_block) adj_array_typed = self.builder.bitcast(adj_list_void, self.node_type.as_pointer().as_pointer()) - i = self.builder.alloca(self.int_type, name="adj_search_i") + i = self.builder.alloca(self.int_type, name="i") self.builder.store(ir.Constant(self.int_type, 0), i) self.builder.branch(adj_loop_block) self.builder.position_at_end(adj_loop_block) i_val = self.builder.load(i) - loop_condition = self.builder.icmp_signed('<', i_val, adj_count) - self.builder.cbranch(loop_condition, adj_check_block, adj_loop_end_block) + loop_cond = self.builder.icmp_signed('<', i_val, adj_count) + self.builder.cbranch(loop_cond, adj_check_block, adj_loop_end_block) self.builder.position_at_end(adj_check_block) entry_ptr = self.builder.gep(adj_array_typed, [i_val]) adj_node = self.builder.load(entry_ptr) - nodes_match = self.builder.icmp_signed('==', adj_node, node2_ptr) - self.builder.cbranch(nodes_match, true_block, adj_next_block) + + adj_node_name_ptr = self.builder.gep(adj_node, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) + adj_node_name = self.builder.load(adj_node_name_ptr) + adj_node_name_len_ptr = self.builder.gep(adj_node, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 2)]) + adj_node_name_len = self.builder.load(adj_node_name_len_ptr) + + len_match = self.builder.icmp_signed('==', adj_node_name_len, node2_name_len) + self.builder.cbranch(len_match, content_check_block, adj_next_block) + + self.builder.position_at_end(content_check_block) + names_match = self._compare_strings(adj_node_name, node2_name, node2_name_len) + self.builder.cbranch(names_match, true_block, adj_next_block) self.builder.position_at_end(adj_next_block) next_i = self.builder.add(i_val, ir.Constant(self.int_type, 1)) self.builder.store(next_i, i) self.builder.branch(adj_loop_block) - self.builder.position_at_end(true_block) - self.builder.ret(ir.Constant(self.bool_type, 1)) - self.builder.position_at_end(adj_loop_end_block) self.builder.ret(ir.Constant(self.bool_type, 0)) + self.builder.position_at_end(true_block) + self.builder.ret(ir.Constant(self.bool_type, 1)) + self.builder.position_at_end(false_block) self.builder.ret(ir.Constant(self.bool_type, 0)) + def _create_remove_vertex(self): remove_vertex_type = ir.FunctionType(self.int_type, @@ -964,6 +1017,11 @@ def _remove_from_all_adjacency_lists(self, graph_ptr, vertex_name, vertex_name_l self.builder.position_at_end(done_adj_cleanup) def _remove_from_adjacency_list(self, src_node_ptr, tgt_node_ptr): + tgt_node_name_ptr = self.builder.gep(tgt_node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) + tgt_node_name = self.builder.load(tgt_node_name_ptr) + tgt_node_name_len_ptr = self.builder.gep(tgt_node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 2)]) + tgt_node_name_len = self.builder.load(tgt_node_name_len_ptr) + adj_list_ptr = self.builder.gep(src_node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 3)]) adj_count_ptr = self.builder.gep(src_node_ptr, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 4)]) @@ -993,8 +1051,20 @@ def _remove_from_adjacency_list(self, src_node_ptr, tgt_node_ptr): adj_entry_ptr = self.builder.gep(adj_list_typed, [i_val]) adj_node = self.builder.load(adj_entry_ptr) - is_target = self.builder.icmp_signed('==', adj_node, tgt_node_ptr) - self.builder.cbranch(is_target, adj_found_block, adj_next_block) + adj_node_name_ptr = self.builder.gep(adj_node, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 1)]) + adj_node_name = self.builder.load(adj_node_name_ptr) + adj_node_name_len_ptr = self.builder.gep(adj_node, [ir.Constant(self.int_type, 0), ir.Constant(self.int_type, 2)]) + adj_node_name_len = self.builder.load(adj_node_name_len_ptr) + + len_match = self.builder.icmp_signed('==', adj_node_name_len, tgt_node_name_len) + + content_check_block = self.builder.block.parent.append_basic_block(name="adj_content_check") + + self.builder.cbranch(len_match, content_check_block, adj_next_block) + + self.builder.position_at_end(content_check_block) + names_match = self._compare_strings(adj_node_name, tgt_node_name, tgt_node_name_len) + self.builder.cbranch(names_match, adj_found_block, adj_next_block) self.builder.position_at_end(adj_found_block) shift_i = self.builder.alloca(self.int_type, name="adj_shift_i") From ad3b7bb1335501683a9673ce394afbc0df291bd2 Mon Sep 17 00:00:00 2001 From: Prerak Singh Date: Sun, 28 Sep 2025 18:55:46 +0530 Subject: [PATCH 19/19] bug fix --- pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py | 5 +++++ pydatastructs/graphs/tests/test_adjacency_list.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py index 47fbc8e88..3ebac77ec 100644 --- a/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py +++ b/pydatastructs/graphs/_backend/cpp/llvm_adjacency_list.py @@ -1,6 +1,7 @@ import llvmlite.binding as llvm import llvmlite.ir as ir from llvmlite import ir +import platform import ctypes from ctypes import Structure, POINTER, c_void_p, c_int, c_char_p, c_double @@ -35,6 +36,10 @@ def __init__(self): self._create_function_declarations() self._create_graph_functions() + if platform.system().lower() == "linux": + self.module.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + else: + self.module.data_layout = str(self.target_data) def _get_target_triple(self): import platform diff --git a/pydatastructs/graphs/tests/test_adjacency_list.py b/pydatastructs/graphs/tests/test_adjacency_list.py index e8c92c9eb..24ef96321 100644 --- a/pydatastructs/graphs/tests/test_adjacency_list.py +++ b/pydatastructs/graphs/tests/test_adjacency_list.py @@ -84,7 +84,7 @@ def test_adjacency_list(): g3 = Graph('a','b',implementation = 'adjacency_list', backend = Backend.LLVM) g3.add_edge('a', 'b') - #assert g3.is_adjacent('a','b') is True + assert g3.is_adjacent('a','b') is True g3.add_vertex('c') g3.add_edge('a','c') assert g3.is_adjacent('a','c') is True