Skip to content

Commit b1afe03

Browse files
authored
Add uninitialized multi-versioning trampoline for autoinit support (#60171)
This adds a single-use autoinit trampoline for multiversioning-aliased functions. "First call" sequence: trampoline -> autoinit trampoline -> arch-specific call Subsequent calls: trampoline -> arch-specific call
1 parent eba48a2 commit b1afe03

File tree

3 files changed

+80
-40
lines changed

3 files changed

+80
-40
lines changed

src/llvm-multiversioning.cpp

Lines changed: 64 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,8 @@ struct CloneCtx {
378378
void clone_partial(Group &grp, Target &tgt);
379379
uint32_t get_func_id(Function *F) const;
380380
std::pair<uint32_t,GlobalVariable*> get_reloc_slot(Function *F) const;
381+
382+
Function *create_trampoline(Function *F, GlobalVariable *slot, bool autoinit=false);
381383
void rewrite_alias(GlobalAlias *alias, Function* F);
382384

383385
MDNode *tbaa_const;
@@ -493,6 +495,53 @@ void CloneCtx::prepare_vmap(ValueToValueMapTy &vmap)
493495
}
494496
}
495497

498+
Function *CloneCtx::create_trampoline(Function *F, GlobalVariable *slot, bool autoinit)
499+
{
500+
Function *trampoline =
501+
Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, "", &M);
502+
503+
trampoline->copyAttributesFrom(F);
504+
trampoline->setVisibility(GlobalValue::HiddenVisibility);
505+
trampoline->setDSOLocal(true);
506+
507+
// drop multiversioning attributes
508+
trampoline->removeFnAttr("julia.mv.reloc");
509+
trampoline->removeFnAttr("julia.mv.clones");
510+
511+
auto BB = BasicBlock::Create(F->getContext(), "top", trampoline);
512+
IRBuilder<> irbuilder(BB);
513+
514+
if (autoinit) {
515+
irbuilder.CreateCall(F->getParent()->getOrInsertFunction(
516+
XSTR(jl_autoinit_and_adopt_thread),
517+
PointerType::get(F->getContext(), 0)
518+
));
519+
}
520+
521+
auto ptr = irbuilder.CreateLoad(F->getType(), slot);
522+
ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
523+
ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(F->getContext(), None));
524+
525+
SmallVector<Value *, 0> Args;
526+
for (auto &arg : trampoline->args())
527+
Args.push_back(&arg);
528+
auto call = irbuilder.CreateCall(F->getFunctionType(), ptr, ArrayRef<Value *>(Args));
529+
if (F->isVarArg()) {
530+
assert(!TT.isARM() && !TT.isPPC() && "musttail not supported on ARM/PPC!");
531+
call->setTailCallKind(CallInst::TCK_MustTail);
532+
} else {
533+
call->setTailCallKind(CallInst::TCK_Tail);
534+
535+
}
536+
537+
if (F->getReturnType() == Type::getVoidTy(F->getContext()))
538+
irbuilder.CreateRetVoid();
539+
else
540+
irbuilder.CreateRet(call);
541+
542+
return trampoline;
543+
}
544+
496545
void CloneCtx::prepare_slots()
497546
{
498547
for (auto &F : orig_funcs) {
@@ -507,7 +556,12 @@ void CloneCtx::prepare_slots()
507556
else {
508557
auto id = get_func_id(F);
509558
const_relocs[id] = GV;
510-
GV->setInitializer(Constant::getNullValue(F->getType()));
559+
560+
// Initialize with a single-use trampoline that calls `jl_autoinit_and_adopt_thread`,
561+
// so that auto-initialization works with multi-versioned entrypoints.
562+
Function *trampoline = create_trampoline(F, GV, /* autoinit */ true);
563+
trampoline->setName(F->getName() + ".autoinit_trampoline");
564+
GV->setInitializer(trampoline);
511565
}
512566
}
513567
}
@@ -665,45 +719,21 @@ void CloneCtx::rewrite_alias(GlobalAlias *alias, Function *F)
665719
{
666720
assert(!is_vector(F->getFunctionType()));
667721

668-
Function *trampoline =
669-
Function::Create(F->getFunctionType(), alias->getLinkage(), "", &M);
670-
trampoline->copyAttributesFrom(F);
671-
trampoline->takeName(alias);
672-
trampoline->setVisibility(alias->getVisibility());
673-
trampoline->setDSOLocal(alias->isDSOLocal());
674-
// drop multiversioning attributes, add alias attribute for testing purposes
675-
trampoline->removeFnAttr("julia.mv.reloc");
676-
trampoline->removeFnAttr("julia.mv.clones");
677-
trampoline->addFnAttr("julia.mv.alias");
678-
trampoline->setDLLStorageClass(alias->getDLLStorageClass());
679-
alias->eraseFromParent();
680-
681722
uint32_t id;
682723
GlobalVariable *slot;
683724
std::tie(id, slot) = get_reloc_slot(F);
725+
assert(slot);
684726

685-
auto BB = BasicBlock::Create(F->getContext(), "top", trampoline);
686-
IRBuilder<> irbuilder(BB);
727+
Function *trampoline = create_trampoline(F, slot, /* autoinit */ false);
728+
trampoline->addFnAttr("julia.mv.alias"); // add alias attribute for testing purposes
687729

688-
auto ptr = irbuilder.CreateLoad(F->getType(), slot);
689-
ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
690-
ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(F->getContext(), None));
691-
692-
SmallVector<Value *, 0> Args;
693-
for (auto &arg : trampoline->args())
694-
Args.push_back(&arg);
695-
auto call = irbuilder.CreateCall(F->getFunctionType(), ptr, ArrayRef<Value *>(Args));
696-
if (F->isVarArg()) {
697-
assert(!TT.isARM() && !TT.isPPC() && "musttail not supported on ARM/PPC!");
698-
call->setTailCallKind(CallInst::TCK_MustTail);
699-
} else {
700-
call->setTailCallKind(CallInst::TCK_Tail);
701-
}
730+
trampoline->takeName(alias);
731+
trampoline->setLinkage(alias->getLinkage());
732+
trampoline->setVisibility(alias->getVisibility());
733+
trampoline->setDSOLocal(alias->isDSOLocal());
734+
trampoline->setDLLStorageClass(alias->getDLLStorageClass());
702735

703-
if (F->getReturnType() == Type::getVoidTy(F->getContext()))
704-
irbuilder.CreateRetVoid();
705-
else
706-
irbuilder.CreateRet(call);
736+
alias->eraseFromParent();
707737
}
708738

709739
void CloneCtx::fix_gv_uses()

test/llvmpasses/multiversioning-clone-only.ll

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
; CHECK: @jl_fvar_idxs = hidden constant [1 x i32] zeroinitializer
88
; CHECK: @jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer
99
; OPAQUE: @subtarget_cloned_gv = hidden global ptr null
10-
; OPAQUE: @subtarget_cloned.reloc_slot = hidden global ptr null
10+
; OPAQUE: @subtarget_cloned.reloc_slot = hidden global ptr @subtarget_cloned.autoinit_trampoline
1111
; CHECK: @jl_fvar_count = hidden constant i64 1
1212
; OPAQUE: @jl_fvar_ptrs = hidden global [1 x ptr] [ptr @subtarget_cloned]
1313
; CHECK: @jl_clone_slots = hidden constant [5 x i32]
@@ -57,7 +57,7 @@ define noundef i32 @subtarget_cloned(i32 noundef %0) #2 {
5757
; COM: should fixup this callsite since 2 is cloned for a subtarget
5858
; CHECK: define{{.*}}@call_subtarget_cloned({{.*}}#[[CALL_SUBTARGET_CLONED_DEFAULT_ATTRS:[0-9]+]]
5959
; CHECK-NEXT: [[FUNC_PTR:%[0-9]+]] = load{{.*}}@subtarget_cloned.reloc_slot{{.*}}!tbaa ![[TBAA_CONST_METADATA:[0-9]+]], !invariant.load
60-
; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]
60+
; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]({{.*}})
6161
; CHECK: ret i32
6262
define noundef i32 @call_subtarget_cloned(i32 noundef %0) #3 {
6363
%2 = call noundef i32 @subtarget_cloned(i32 noundef %0)
@@ -66,13 +66,23 @@ define noundef i32 @call_subtarget_cloned(i32 noundef %0) #3 {
6666

6767
; CHECK: define{{.*}}@call_subtarget_cloned_but_not_cloned({{.*}}#[[BORING_DEFAULT_ATTRS]]
6868
; CHECK-NEXT: [[FUNC_PTR:%[0-9]+]] = load{{.*}}@subtarget_cloned.reloc_slot{{.*}}!tbaa ![[TBAA_CONST_METADATA]], !invariant.load
69-
; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]
69+
; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]({{.*}})
7070
; CHECK: ret i32
7171
define noundef i32 @call_subtarget_cloned_but_not_cloned(i32 noundef %0) #0 {
7272
%2 = call noundef i32 @subtarget_cloned(i32 noundef %0)
7373
ret i32 %2
7474
}
7575

76+
; COM: check that the autoinit trampoline is generated correctly
77+
; CHECK: define{{.*}}@subtarget_cloned.autoinit_trampoline({{.*}}
78+
; CHECK-NEXT: top:
79+
; CHECK-NEXT: call ptr @ijl_autoinit_and_adopt_thread()
80+
; CHECK-NEXT: [[FUNC_PTR:%[0-9]+]] = load ptr, ptr @subtarget_cloned.reloc_slot{{.*}}!tbaa ![[TBAA_CONST_METADATA]], !invariant.load
81+
; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]({{.*}})
82+
; CHECK: ret i32
83+
84+
declare ptr @ijl_autoinit_and_adopt_thread()
85+
7686
; CHECK: define{{.*}}@boring.1({{.*}}#[[BORING_CLONEALL_ATTRS:[0-9]+]]
7787
; CHECK-NEXT: ret i32 %0
7888

@@ -106,10 +116,10 @@ define noundef i32 @call_subtarget_cloned_but_not_cloned(i32 noundef %0) #0 {
106116
; CHECK-NOT: @subtarget_cloned_but_not_cloned.2
107117

108118
; COM: check for alias being rewritten to a function trampoline
109-
; CHECK: define{{.*}}@subtarget_cloned_aliased{{.*}}#[[SUBTARGET_ALIASED_ATTRS:[0-9]+]]
119+
; CHECK: define{{.*}}@subtarget_cloned_aliased{{[^.]*}}#[[SUBTARGET_ALIASED_ATTRS:[0-9]+]]
110120
; CHECK-NOT: }
111121
; CHECK: [[FUNC_PTR:%[0-9]+]] = load{{.*}}@subtarget_cloned.reloc_slot{{.*}}!tbaa ![[TBAA_CONST_METADATA]], !invariant.load
112-
; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]
122+
; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]({{.*}})
113123
; CHECK: ret i32
114124

115125
; CHECK: attributes #[[BORING_DEFAULT_ATTRS]]

test/llvmpasses/multiversioning-x86.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
; OPAQUE: @jl_gvar_ptrs = global [0 x ptr] zeroinitializer, align 8
1212
; CHECK: @jl_fvar_idxs = hidden constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 8
1313
; CHECK: @jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer, align 8
14-
; OPAQUE: @simd_test.reloc_slot = hidden global ptr null
14+
; OPAQUE: @simd_test.reloc_slot = hidden global ptr @simd_test.autoinit_trampoline
1515
; OPAQUE: @jl_fvar_ptrs = hidden global [5 x ptr] [ptr @boring, ptr @fastmath_test, ptr @loop_test, ptr @simd_test, ptr @simd_test_call]
1616
; OPAQUE: @jl_clone_slots = hidden constant [3 x i32] [i32 1, i32 3, i32 trunc (i64 sub (i64 ptrtoint (ptr @simd_test.reloc_slot to i64), i64 ptrtoint (ptr @jl_clone_slots to i64)) to i32)]
1717
; CHECK: @jl_clone_idxs = hidden constant [10 x i32] [i32 -2147483647, i32 3, i32 -2147483647, i32 3, i32 4, i32 1, i32 1, i32 2, i32 -2147483645, i32 4]

0 commit comments

Comments
 (0)