diff --git a/bitcode/_cl_print_str.cl b/bitcode/_cl_print_str.cl index fbcd2d44c..682c4789b 100644 --- a/bitcode/_cl_print_str.cl +++ b/bitcode/_cl_print_str.cl @@ -20,7 +20,7 @@ * DEALINGS IN THE SOFTWARE. */ -static void __attribute__((used)) _cl_print_str(__generic const char *S) { +void _cl_print_str(__generic const char *S) { if (S == 0) { return; /* Match AMD/nvidia: null %s prints nothing, format provides newline */ } diff --git a/bitcode/texture.cl b/bitcode/texture.cl index 80a4ede4a..032b2b5e4 100644 --- a/bitcode/texture.cl +++ b/bitcode/texture.cl @@ -43,47 +43,47 @@ uint4 _chip_tex2du(hipTextureObject_t TextureObject, float2 Pos); // ^^^ DECLARATIONS INTENTIONALLY WITHOUT DEFINITION ^^^ -static int4 __attribute__((used)) +int4 _chip_tex1dfetchi_impl(image1d_t I, sampler_t S, int Pos) { return read_imagei(I, S, Pos); } -static uint4 __attribute__((used)) +uint4 _chip_tex1dfetchu_impl(image1d_t I, sampler_t S, int Pos) { return read_imageui(I, S, Pos); } -static float4 __attribute__((used)) +float4 _chip_tex1dfetchf_impl(image1d_t I, sampler_t S, int Pos) { return read_imagef(I, S, Pos); } -static int4 __attribute__((used)) +int4 _chip_tex1di_impl(image1d_t I, sampler_t S, float Pos) { return read_imagei(I, S, Pos); } -static uint4 __attribute__((used)) +uint4 _chip_tex1du_impl(image1d_t I, sampler_t S, float Pos) { return read_imageui(I, S, Pos); } -static float4 __attribute__((used)) +float4 _chip_tex1df_impl(image1d_t I, sampler_t S, float Pos) { return read_imagef(I, S, Pos); } -static float4 __attribute__((used)) +float4 _chip_tex2df_impl(image2d_t I, sampler_t S, float2 Pos) { return read_imagef(I, S, Pos); } -static int4 __attribute__((used)) +int4 _chip_tex2di_impl(image2d_t I, sampler_t S, float2 Pos) { return read_imagei(I, S, Pos); } -static uint4 __attribute__((used)) +uint4 _chip_tex2du_impl(image2d_t I, sampler_t S, float2 Pos) { return read_imageui(I, S, Pos); } diff --git a/llvm_passes/HipPrintf.cpp b/llvm_passes/HipPrintf.cpp index 6331792a5..2d81459e2 100644 --- a/llvm_passes/HipPrintf.cpp +++ b/llvm_passes/HipPrintf.cpp @@ -314,21 +314,81 @@ HipPrintfToOpenCLPrintfPass::getOrCreateStrLiteralArg(const std::string &Str, Function *HipPrintfToOpenCLPrintfPass::getOrCreatePrintStringF() { if (GlobalValue *OldPrintStrF = - M_->getNamedValue(ORIG_PRINT_STRING_FUNC_NAME)) - return cast(OldPrintStrF); + M_->getNamedValue(ORIG_PRINT_STRING_FUNC_NAME)) { + auto *Existing = cast(OldPrintStrF); + if (!Existing->isDeclaration()) + return Existing; + // Declaration exists (e.g. inserted on a prior call before we got + // to define a body); fall through and define the body now. + } - auto *Int8Ty = IntegerType::get(M_->getContext(), 8); + auto &Ctx = M_->getContext(); + auto *Int8Ty = IntegerType::get(Ctx, 8); + auto *Int32Ty = IntegerType::get(Ctx, 32); + auto *VoidTy = Type::getVoidTy(Ctx); PointerType *GenericCStrArgT = PointerType::get(Int8Ty, SPIRV_OPENCL_GENERIC_AS); + PointerType *ConstStrPtrT = + PointerType::get(Int8Ty, SPIRV_OPENCL_CONSTANT_AS); - FunctionType *PrintStrFTy = FunctionType::get( - Type::getVoidTy(M_->getContext()), {GenericCStrArgT}, false); - - FunctionCallee PrintStrF = - M_->getOrInsertFunction(ORIG_PRINT_STRING_FUNC_NAME, PrintStrFTy); - cast(PrintStrF.getCallee()) - ->setCallingConv(llvm::CallingConv::SPIR_FUNC); - return cast(PrintStrF.getCallee()); + FunctionType *PrintStrFTy = + FunctionType::get(VoidTy, {GenericCStrArgT}, false); + Function *F = cast( + M_->getOrInsertFunction(ORIG_PRINT_STRING_FUNC_NAME, PrintStrFTy) + .getCallee()); + F->setCallingConv(llvm::CallingConv::SPIR_FUNC); + F->setLinkage(llvm::GlobalValue::InternalLinkage); + + // Define the body inline so the kernel module is self-contained for %s + // printf support. The historical implementation in bitcode/_cl_print_str.cl + // required `static __attribute__((used))`, which forced an `@llvm.used` + // entry into hipspv.bc. That collided with `@llvm.used` from HIP TUs + // (different element address space) at `-mlink-builtin-bitcode` time and + // broke any HIP code that also uses `__attribute__((used))` (rocThrust). + // + // Equivalent C: + // void _cl_print_str(__generic const char *S) { + // if (S == 0) return; + // unsigned Pos = 0; + // char C; + // while ((C = S[Pos]) != 0) { printf("%c", C); ++Pos; } + // } + BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F); + BasicBlock *Loop = BasicBlock::Create(Ctx, "loop", F); + BasicBlock *Body = BasicBlock::Create(Ctx, "body", F); + BasicBlock *Exit = BasicBlock::Create(Ctx, "exit", F); + + Argument *S = F->getArg(0); + + FunctionType *PrintfTy = + FunctionType::get(Int32Ty, {ConstStrPtrT}, /*isVarArg=*/true); + Function *Printf = cast( + M_->getOrInsertFunction("printf", PrintfTy).getCallee()); + + IRBuilder<> B(Entry); + Value *IsNull = B.CreateICmpEQ(S, ConstantPointerNull::get(GenericCStrArgT)); + B.CreateCondBr(IsNull, Exit, Loop); + + B.SetInsertPoint(Loop); + PHINode *Pos = B.CreatePHI(Int32Ty, 2); + Pos->addIncoming(ConstantInt::get(Int32Ty, 0), Entry); + Value *CharPtr = B.CreateGEP(Int8Ty, S, Pos); + Value *C = B.CreateLoad(Int8Ty, CharPtr); + Value *IsZero = B.CreateICmpEQ(C, ConstantInt::get(Int8Ty, 0)); + B.CreateCondBr(IsZero, Exit, Body); + + B.SetInsertPoint(Body); + Constant *PercentC = getOrCreateStrLiteralArg("%c", B); + CallInst *PrintfCall = B.CreateCall(Printf, {PercentC, C}); + PrintfCall->setCallingConv(llvm::CallingConv::SPIR_FUNC); + Value *PosNext = B.CreateAdd(Pos, ConstantInt::get(Int32Ty, 1)); + Pos->addIncoming(PosNext, Body); + B.CreateBr(Loop); + + B.SetInsertPoint(Exit); + B.CreateRetVoid(); + + return F; } // Get called function from 'CI' call or return nullptr the call is indirect. diff --git a/tests/runtime/TestFixLlvmUsedAddrspace.hip b/tests/runtime/TestFixLlvmUsedAddrspace.hip new file mode 100644 index 000000000..6467825a6 --- /dev/null +++ b/tests/runtime/TestFixLlvmUsedAddrspace.hip @@ -0,0 +1,35 @@ +// Reproduces: @llvm.used address space mismatch between HIP module (ptr addrspace(4)) +// and chipStar device library (ptr addrspace(0)) causes "Appending variables with +// different element types" link error. +// +// The __attribute__((used)) __device__ static variable creates an @llvm.used entry +// with ptr addrspace(4) in the HIP compilation. The device library (hipspv-spirv64.bc) +// has @llvm.used entries with ptr (addrspace 0) from its OpenCL C compilation. +// LLVM's linker refuses to merge them. + +#include +#include + +// This triggers @llvm.used with ptr addrspace(4) — same pattern as +// rocThrust's runtime_static_assert.h +__attribute__((used)) __device__ static int* device_ptr = nullptr; + +__global__ void kernel(int* out) { + out[threadIdx.x] = threadIdx.x; +} + +int main() { + int *d, h[32]; + hipMalloc(&d, 128); + kernel<<<1, 32>>>(d); + hipDeviceSynchronize(); + hipMemcpy(h, d, 128, hipMemcpyDeviceToHost); + hipFree(d); + + bool pass = true; + for (int i = 0; i < 32; i++) { + if (h[i] != i) { pass = false; break; } + } + printf("%s\n", pass ? "PASS" : "FAIL"); + return pass ? 0 : 1; +}