diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index fd73314c9f84c..a2e6f2f4f4817 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4869,6 +4869,32 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, emitArraySubscriptGEP(*this, Int8Ty, Addr.emitRawPointer(*this), ScaledIdx, false, SignedIndices, E->getExprLoc()); Addr = Address(EltPtr, OrigBaseElemTy, EltAlign); + } else if (getLangOpts().HLSL && + E->getType().getAddressSpace() == LangAS::hlsl_constant) { + // This is an array inside of a cbuffer. + Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo); + + SmallVector Indices; + Indices.push_back(EmitIdxAfterBase(/*Promote*/true)); + + CharUnits ElementSize = getContext().getTypeSizeInChars(E->getType()); + CharUnits RowAlignedSize = ElementSize.alignTo(CharUnits::fromQuantity(16)); + + llvm::Type *EltTyToIndex = Addr.getElementType(); + if (RowAlignedSize > ElementSize) { + llvm::Type *Padding = CGM.getTargetCodeGenInfo().getHLSLPadding( + CGM, RowAlignedSize - ElementSize); + EltTyToIndex = llvm::StructType::get( + getLLVMContext(), {EltTyToIndex, Padding}, /*isPacked=*/true); + Indices.push_back(llvm::ConstantInt::get(Indices[0]->getType(), 0)); + } + + CharUnits EltAlign = + getArrayElementAlign(Addr.getAlignment(), Indices[0], RowAlignedSize); + llvm::Value *EltPtr = + emitArraySubscriptGEP(*this, EltTyToIndex, Addr.emitRawPointer(*this), + Indices, false, SignedIndices, E->getExprLoc()); + Addr = Address(EltPtr, Addr.getElementType(), EltAlign); } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) { // If this is A[i] where A is an array, the frontend will have decayed the // base to be a ArrayToPointerDecay implicit cast. While correct, it is diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index eee397f1f3d19..bd7d30b10d4d0 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -2279,6 +2279,10 @@ void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty, } } + if (getLangOpts().HLSL && Ty.getAddressSpace() == LangAS::hlsl_constant) + if (CGM.getHLSLRuntime().emitBufferCopy(*this, DestPtr, SrcPtr, Ty)) + return; + // Aggregate assignment turns into llvm.memcpy. This is almost valid per // C99 6.5.16.1p3, which states "If the value being stored in an object is // read from another object that overlaps in anyway the storage of the first diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index ecab9336a9f82..bd86df9d68094 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -13,10 +13,10 @@ //===----------------------------------------------------------------------===// #include "CGHLSLRuntime.h" -#include "Address.h" #include "CGDebugInfo.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "HLSLBufferLayoutBuilder.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attrs.inc" @@ -26,6 +26,7 @@ #include "clang/AST/Type.h" #include "clang/Basic/TargetOptions.h" #include "clang/Frontend/FrontendDiagnostic.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Frontend/HLSL/RootSignatureMetadata.h" @@ -43,6 +44,8 @@ #include #include +#define DEBUG_TYPE "cghlslruntime" + using namespace clang; using namespace CodeGen; using namespace clang::hlsl; @@ -265,9 +268,9 @@ CGHLSLRuntime::convertHLSLSpecificType(const Type *T, assert(T->isHLSLSpecificType() && "Not an HLSL specific type!"); // Check if the target has a specific translation for this type first. - if (llvm::Type *TargetTy = + if (llvm::Type *LayoutTy = CGM.getTargetCodeGenInfo().getHLSLType(CGM, T, Packoffsets)) - return TargetTy; + return LayoutTy; llvm_unreachable("Generic handling of HLSL types is not supported."); } @@ -284,10 +287,8 @@ void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl, // get the layout struct from constant buffer target type llvm::Type *BufType = BufGV->getValueType(); - llvm::Type *BufLayoutType = - cast(BufType)->getTypeParameter(0); llvm::StructType *LayoutStruct = cast( - cast(BufLayoutType)->getTypeParameter(0)); + cast(BufType)->getTypeParameter(0)); // Start metadata list associating the buffer global variable with its // constatns @@ -326,6 +327,9 @@ void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl, continue; } + if (CGM.getTargetCodeGenInfo().isHLSLPadding(*ElemIt)) + ++ElemIt; + assert(ElemIt != LayoutStruct->element_end() && "number of elements in layout struct does not match"); llvm::Type *LayoutType = *ElemIt++; @@ -423,12 +427,11 @@ void CGHLSLRuntime::addBuffer(const HLSLBufferDecl *BufDecl) { if (BufDecl->hasValidPackoffset()) fillPackoffsetLayout(BufDecl, Layout); - llvm::TargetExtType *TargetTy = - cast(convertHLSLSpecificType( - ResHandleTy, BufDecl->hasValidPackoffset() ? &Layout : nullptr)); + llvm::Type *LayoutTy = convertHLSLSpecificType( + ResHandleTy, BufDecl->hasValidPackoffset() ? &Layout : nullptr); llvm::GlobalVariable *BufGV = new GlobalVariable( - TargetTy, /*isConstant*/ false, - GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(TargetTy), + LayoutTy, /*isConstant*/ false, + GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(LayoutTy), llvm::formatv("{0}{1}", BufDecl->getName(), BufDecl->isCBuffer() ? ".cb" : ".tb"), GlobalValue::NotThreadLocal); @@ -454,7 +457,7 @@ void CGHLSLRuntime::addRootSignature( SignatureDecl->getRootElements(), nullptr, M); } -llvm::TargetExtType * +llvm::StructType * CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) { const auto Entry = LayoutTypes.find(StructType); if (Entry != LayoutTypes.end()) @@ -463,7 +466,7 @@ CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) { } void CGHLSLRuntime::addHLSLBufferLayoutType(const RecordType *StructType, - llvm::TargetExtType *LayoutTy) { + llvm::StructType *LayoutTy) { assert(getHLSLBufferLayoutType(StructType) == nullptr && "layout type for this struct already exist"); LayoutTypes[StructType] = LayoutTy; @@ -997,3 +1000,146 @@ std::optional CGHLSLRuntime::emitResourceArraySubscriptExpr( } return CGF.MakeAddrLValue(TmpVar, ResultTy, AlignmentSource::Decl); } + +namespace { +class HLSLBufferCopyEmitter { + CodeGenFunction &CGF; + Address DestPtr; + Address SrcPtr; + llvm::Type *LayoutTy = nullptr; + + SmallVector CurStoreIndices; + SmallVector CurLoadIndices; + + void emitCopyAtIndices(llvm::Type *FieldTy, llvm::ConstantInt *StoreIndex, + llvm::ConstantInt *LoadIndex) { + CurStoreIndices.push_back(StoreIndex); + CurLoadIndices.push_back(LoadIndex); + auto RestoreIndices = llvm::make_scope_exit([&]() { + CurStoreIndices.pop_back(); + CurLoadIndices.pop_back(); + }); + + if (processArray(FieldTy)) + return; + if (processBufferLayoutArray(FieldTy)) + return; + if (processStruct(FieldTy)) + return; + + // We have a scalar or vector element - emit a copy. + CharUnits Align = CharUnits::fromQuantity( + CGF.CGM.getDataLayout().getABITypeAlign(FieldTy)); + Address SrcGEP = RawAddress( + CGF.Builder.CreateInBoundsGEP(LayoutTy, SrcPtr.getBasePointer(), + CurLoadIndices, "cbuf.src"), + FieldTy, Align, SrcPtr.isKnownNonNull()); + Address DestGEP = CGF.Builder.CreateInBoundsGEP( + DestPtr, CurStoreIndices, FieldTy, Align, "cbuf.dest"); + llvm::Value *Load = CGF.Builder.CreateLoad(SrcGEP, "cbuf.load"); + CGF.Builder.CreateStore(Load, DestGEP); + } + + bool processArray(llvm::Type *FieldTy) { + auto *AT = dyn_cast(FieldTy); + if (!AT) + return false; + + // If we have an array then there isn't any padding + // between elements. We just need to copy each element over. + for (unsigned I = 0, E = AT->getNumElements(); I < E; ++I) + emitCopyAtIndices(AT->getElementType(), + llvm::ConstantInt::get(CGF.SizeTy, I), + llvm::ConstantInt::get(CGF.SizeTy, I)); + return true; + } + + bool processBufferLayoutArray(llvm::Type *FieldTy) { + auto *ST = dyn_cast(FieldTy); + if (!ST || ST->getNumElements() != 2) + return false; + + auto *PaddedEltsTy = dyn_cast(ST->getElementType(0)); + if (!PaddedEltsTy) + return false; + + auto *PaddedTy = dyn_cast(PaddedEltsTy->getElementType()); + if (!PaddedTy || PaddedTy->getNumElements() != 2) + return false; + + if (!CGF.CGM.getTargetCodeGenInfo().isHLSLPadding( + PaddedTy->getElementType(1))) + return false; + + llvm::Type *ElementTy = ST->getElementType(1); + if (PaddedTy->getElementType(0) != ElementTy) + return false; + + // All but the last of the logical array elements are in the padded array. + unsigned NumElts = PaddedEltsTy->getNumElements() + 1; + + // Add an extra indirection to the load for the struct and walk the + // array prefix. + CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 0)); + for (unsigned I = 0; I < NumElts - 1; ++I) { + // We need to copy the element itself, without the padding. + CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, I)); + emitCopyAtIndices(ElementTy, llvm::ConstantInt::get(CGF.SizeTy, I), + llvm::ConstantInt::get(CGF.Int32Ty, 0)); + CurLoadIndices.pop_back(); + } + CurLoadIndices.pop_back(); + + // Now copy the last element. + emitCopyAtIndices(ElementTy, + llvm::ConstantInt::get(CGF.SizeTy, NumElts - 1), + llvm::ConstantInt::get(CGF.Int32Ty, 1)); + + return true; + } + + bool processStruct(llvm::Type *FieldTy) { + auto *ST = dyn_cast(FieldTy); + if (!ST) + return false; + + unsigned Skipped = 0; + for (unsigned I = 0, E = ST->getNumElements(); I < E; ++I) { + llvm::Type *ElementTy = ST->getElementType(I); + if (CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(ElementTy)) + ++Skipped; + else + emitCopyAtIndices(ElementTy, llvm::ConstantInt::get(CGF.Int32Ty, I), + llvm::ConstantInt::get(CGF.Int32Ty, I + Skipped)); + } + return true; + } + +public: + HLSLBufferCopyEmitter(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr) + : CGF(CGF), DestPtr(DestPtr), SrcPtr(SrcPtr) {} + + bool emitCopy(QualType CType) { + LayoutTy = HLSLBufferLayoutBuilder(CGF.CGM).layOutType(CType); + + LLVM_DEBUG({ + dbgs() << "Emitting copy of "; + LayoutTy->print(dbgs()); + dbgs() << "\n"; + }); + + // If we don't have an aggregate, we can just fall back to normal memcpy. + if (!LayoutTy->isAggregateType()) + return false; + + emitCopyAtIndices(LayoutTy, llvm::ConstantInt::get(CGF.SizeTy, 0), + llvm::ConstantInt::get(CGF.SizeTy, 0)); + return true; + } +}; +} // namespace + +bool CGHLSLRuntime::emitBufferCopy(CodeGenFunction &CGF, Address DestPtr, + Address SrcPtr, QualType CType) { + return HLSLBufferCopyEmitter(CGF, DestPtr, SrcPtr).emitCopy(CType); +} diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 103b4a98f6c26..97c7d0619950a 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -15,20 +15,19 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H #define LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H -#include "llvm/ADT/DenseMap.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IntrinsicsDirectX.h" -#include "llvm/IR/IntrinsicsSPIRV.h" - +#include "Address.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/HLSLRuntime.h" - +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Frontend/HLSL/HLSLResource.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsDirectX.h" +#include "llvm/IR/IntrinsicsSPIRV.h" #include #include @@ -187,16 +186,18 @@ class CGHLSLRuntime { llvm::Instruction *getConvergenceToken(llvm::BasicBlock &BB); - llvm::TargetExtType * - getHLSLBufferLayoutType(const RecordType *LayoutStructTy); + llvm::StructType *getHLSLBufferLayoutType(const RecordType *LayoutStructTy); void addHLSLBufferLayoutType(const RecordType *LayoutStructTy, - llvm::TargetExtType *LayoutTy); + llvm::StructType *LayoutTy); void emitInitListOpaqueValues(CodeGenFunction &CGF, InitListExpr *E); std::optional emitResourceArraySubscriptExpr(const ArraySubscriptExpr *E, CodeGenFunction &CGF); + bool emitBufferCopy(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, + QualType CType); + private: void emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl, llvm::GlobalVariable *BufGV); @@ -204,7 +205,7 @@ class CGHLSLRuntime { llvm::GlobalVariable *GV); llvm::Triple::ArchType getArch(); - llvm::DenseMap LayoutTypes; + llvm::DenseMap LayoutTypes; }; } // namespace CodeGen diff --git a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp index 838903cdcd1ee..99b5602b18bff 100644 --- a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp +++ b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp @@ -9,6 +9,7 @@ #include "HLSLBufferLayoutBuilder.h" #include "CGHLSLRuntime.h" #include "CodeGenModule.h" +#include "TargetInfo.h" #include "clang/AST/Type.h" #include @@ -19,71 +20,21 @@ using namespace clang; using namespace clang::CodeGen; -using llvm::hlsl::CBufferRowSizeInBytes; -namespace { - -// Creates a new array type with the same dimentions but with the new -// element type. -static llvm::Type * -createArrayWithNewElementType(CodeGenModule &CGM, - const ConstantArrayType *ArrayType, - llvm::Type *NewElemType) { - const clang::Type *ArrayElemType = ArrayType->getArrayElementTypeNoTypeQual(); - if (ArrayElemType->isConstantArrayType()) - NewElemType = createArrayWithNewElementType( - CGM, cast(ArrayElemType), NewElemType); - return llvm::ArrayType::get(NewElemType, ArrayType->getSExtSize()); -} - -// Returns the size of a scalar or vector in bytes -static unsigned getScalarOrVectorSizeInBytes(llvm::Type *Ty) { - assert(Ty->isVectorTy() || Ty->isIntegerTy() || Ty->isFloatingPointTy()); - if (Ty->isVectorTy()) { - llvm::FixedVectorType *FVT = cast(Ty); - return FVT->getNumElements() * - (FVT->getElementType()->getScalarSizeInBits() / 8); - } - return Ty->getScalarSizeInBits() / 8; -} - -} // namespace +static const CharUnits CBufferRowSize = + CharUnits::fromQuantity(llvm::hlsl::CBufferRowSizeInBytes); namespace clang { namespace CodeGen { -// Creates a layout type for given struct or class with HLSL constant buffer -// layout taking into account PackOffsets, if provided. -// Previously created layout types are cached by CGHLSLRuntime. -// -// The function iterates over all fields of the record type (including base -// classes) and calls layoutField to converts each field to its corresponding -// LLVM type and to calculate its HLSL constant buffer layout. Any embedded -// structs (or arrays of structs) are converted to target layout types as well. -// -// When PackOffsets are specified the elements will be placed based on the -// user-specified offsets. Not all elements must have a packoffset/register(c#) -// annotation though. For those that don't, the PackOffsets array will contain -// -1 value instead. These elements must be placed at the end of the layout -// after all of the elements with specific offset. -llvm::TargetExtType *HLSLBufferLayoutBuilder::createLayoutType( +llvm::StructType *HLSLBufferLayoutBuilder::layOutStruct( const RecordType *RT, const llvm::SmallVector *PackOffsets) { // check if we already have the layout type for this struct - if (llvm::TargetExtType *Ty = - CGM.getHLSLRuntime().getHLSLBufferLayoutType(RT)) + // TODO: Do we need to check for matching PackOffsets? + if (llvm::StructType *Ty = CGM.getHLSLRuntime().getHLSLBufferLayoutType(RT)) return Ty; - SmallVector Layout; - SmallVector LayoutElements; - unsigned Index = 0; // packoffset index - unsigned EndOffset = 0; - - SmallVector> DelayLayoutFields; - - // reserve first spot in the layout vector for buffer size - Layout.push_back(0); - // iterate over all fields of the record, including fields on base classes llvm::SmallVector RecordDecls; RecordDecls.push_back(RT->castAsCXXRecordDecl()); @@ -94,179 +45,102 @@ llvm::TargetExtType *HLSLBufferLayoutBuilder::createLayoutType( RecordDecls.push_back(D->bases_begin()->getType()->castAsCXXRecordDecl()); } - unsigned FieldOffset; - llvm::Type *FieldType; + SmallVector Layout; + SmallVector DelayLayoutFields; + CharUnits CurrentOffset = CharUnits::Zero(); + auto LayOutField = [&](QualType FieldType) { + llvm::Type *LayoutType = layOutType(FieldType); + + const llvm::DataLayout &DL = CGM.getDataLayout(); + CharUnits Size = + CharUnits::fromQuantity(DL.getTypeSizeInBits(LayoutType) / 8); + CharUnits Align = CharUnits::fromQuantity(DL.getABITypeAlign(LayoutType)); + + if (LayoutType->isAggregateType() || + (CurrentOffset % CBufferRowSize) + Size > CBufferRowSize) + Align = Align.alignTo(CBufferRowSize); + + CharUnits NextOffset = CurrentOffset.alignTo(Align); + if (NextOffset > CurrentOffset) { + llvm::Type *Padding = CGM.getTargetCodeGenInfo().getHLSLPadding( + CGM, NextOffset - CurrentOffset); + Layout.emplace_back(Padding); + CurrentOffset = NextOffset; + } + Layout.emplace_back(LayoutType); + CurrentOffset += Size; + }; + unsigned PackOffsetIndex = 0; while (!RecordDecls.empty()) { const CXXRecordDecl *RD = RecordDecls.pop_back_val(); for (const auto *FD : RD->fields()) { - assert((!PackOffsets || Index < PackOffsets->size()) && + assert((!PackOffsets || PackOffsetIndex < PackOffsets->size()) && "number of elements in layout struct does not match number of " "packoffset annotations"); // No PackOffset info at all, or have a valid packoffset/register(c#) // annotations value -> layout the field. - const int PO = PackOffsets ? (*PackOffsets)[Index++] : -1; - if (!PackOffsets || PO != -1) { - if (!layoutField(FD, EndOffset, FieldOffset, FieldType, PO)) - return nullptr; - Layout.push_back(FieldOffset); - LayoutElements.push_back(FieldType); + const int PO = PackOffsets ? (*PackOffsets)[PackOffsetIndex++] : -1; + if (PO != -1) { + LayOutField(FD->getType()); continue; } // Have PackOffset info, but there is no packoffset/register(cX) // annotation on this field. Delay the layout until after all of the // other elements with packoffsets/register(cX) are processed. - DelayLayoutFields.emplace_back(FD, LayoutElements.size()); - // reserve space for this field in the layout vector and elements list - Layout.push_back(UINT_MAX); - LayoutElements.push_back(nullptr); + DelayLayoutFields.emplace_back(FD); } } // process delayed layouts - for (auto I : DelayLayoutFields) { - const FieldDecl *FD = I.first; - const unsigned IndexInLayoutElements = I.second; - // the first item in layout vector is size, so we need to offset the index - // by 1 - const unsigned IndexInLayout = IndexInLayoutElements + 1; - assert(Layout[IndexInLayout] == UINT_MAX && - LayoutElements[IndexInLayoutElements] == nullptr); - - if (!layoutField(FD, EndOffset, FieldOffset, FieldType)) - return nullptr; - Layout[IndexInLayout] = FieldOffset; - LayoutElements[IndexInLayoutElements] = FieldType; - } + for (const FieldDecl *FD : DelayLayoutFields) + LayOutField(FD->getType()); - // set the size of the buffer - Layout[0] = EndOffset; - - // create the layout struct type; anonymous struct have empty name but + // Create the layout struct type; anonymous structs have empty name but // non-empty qualified name const auto *Decl = RT->castAsCXXRecordDecl(); std::string Name = Decl->getName().empty() ? "anon" : Decl->getQualifiedNameAsString(); - llvm::StructType *StructTy = - llvm::StructType::create(LayoutElements, Name, true); - // create target layout type - llvm::TargetExtType *NewLayoutTy = llvm::TargetExtType::get( - CGM.getLLVMContext(), LayoutTypeName, {StructTy}, Layout); - if (NewLayoutTy) - CGM.getHLSLRuntime().addHLSLBufferLayoutType(RT, NewLayoutTy); - return NewLayoutTy; + llvm::StructType *NewTy = llvm::StructType::create(Layout, Name, + /*isPacked=*/true); + CGM.getHLSLRuntime().addHLSLBufferLayoutType(RT, NewTy); + return NewTy; } -// The function converts a single field of HLSL Buffer to its corresponding -// LLVM type and calculates it's layout. Any embedded structs (or -// arrays of structs) are converted to target layout types as well. -// The converted type is set to the FieldType parameter, the element -// offset is set to the FieldOffset parameter. The EndOffset (=size of the -// buffer) is also updated accordingly to the offset just after the placed -// element, unless the incoming EndOffset already larger (may happen in case -// of unsorted packoffset annotations). -// Returns true if the conversion was successful. -// The packoffset parameter contains the field's layout offset provided by the -// user or -1 if there was no packoffset (or register(cX)) annotation. -bool HLSLBufferLayoutBuilder::layoutField(const FieldDecl *FD, - unsigned &EndOffset, - unsigned &FieldOffset, - llvm::Type *&FieldType, - int Packoffset) { - - // Size of element; for arrays this is a size of a single element in the - // array. Total array size of calculated as (ArrayCount-1) * ArrayStride + - // ElemSize. - unsigned ElemSize = 0; - unsigned ElemOffset = 0; - unsigned ArrayCount = 1; - unsigned ArrayStride = 0; - - unsigned NextRowOffset = llvm::alignTo(EndOffset, CBufferRowSizeInBytes); - - llvm::Type *ElemLayoutTy = nullptr; - QualType FieldTy = FD->getType(); - - if (FieldTy->isConstantArrayType()) { - // Unwrap array to find the element type and get combined array size. - QualType Ty = FieldTy; - while (Ty->isConstantArrayType()) { - auto *ArrayTy = CGM.getContext().getAsConstantArrayType(Ty); - ArrayCount *= ArrayTy->getSExtSize(); - Ty = ArrayTy->getElementType(); - } - // For array of structures, create a new array with a layout type - // instead of the structure type. - if (Ty->isStructureOrClassType()) { - llvm::Type *NewTy = cast( - createLayoutType(Ty->getAsCanonical())); - if (!NewTy) - return false; - assert(isa(NewTy) && "expected target type"); - ElemSize = cast(NewTy)->getIntParameter(0); - ElemLayoutTy = createArrayWithNewElementType( - CGM, cast(FieldTy.getTypePtr()), NewTy); - } else { - // Array of vectors or scalars - ElemSize = - getScalarOrVectorSizeInBytes(CGM.getTypes().ConvertTypeForMem(Ty)); - ElemLayoutTy = CGM.getTypes().ConvertTypeForMem(FieldTy); - } - ArrayStride = llvm::alignTo(ElemSize, CBufferRowSizeInBytes); - ElemOffset = (Packoffset != -1) ? Packoffset : NextRowOffset; - - } else if (FieldTy->isStructureOrClassType()) { - // Create a layout type for the structure - ElemLayoutTy = createLayoutType( - cast(FieldTy->getAsCanonical())); - if (!ElemLayoutTy) - return false; - assert(isa(ElemLayoutTy) && "expected target type"); - ElemSize = cast(ElemLayoutTy)->getIntParameter(0); - ElemOffset = (Packoffset != -1) ? Packoffset : NextRowOffset; - - } else { - // scalar or vector - find element size and alignment - unsigned Align = 0; - ElemLayoutTy = CGM.getTypes().ConvertTypeForMem(FieldTy); - if (ElemLayoutTy->isVectorTy()) { - // align vectors by sub element size - const llvm::FixedVectorType *FVT = - cast(ElemLayoutTy); - unsigned SubElemSize = FVT->getElementType()->getScalarSizeInBits() / 8; - ElemSize = FVT->getNumElements() * SubElemSize; - Align = SubElemSize; - } else { - assert(ElemLayoutTy->isIntegerTy() || ElemLayoutTy->isFloatingPointTy()); - ElemSize = ElemLayoutTy->getScalarSizeInBits() / 8; - Align = ElemSize; - } +llvm::Type *HLSLBufferLayoutBuilder::layOutArray(const ConstantArrayType *AT) { + llvm::Type *EltTy = layOutType(AT->getElementType()); + uint64_t Count = AT->getZExtSize(); + + CharUnits EltSize = + CharUnits::fromQuantity(CGM.getDataLayout().getTypeSizeInBits(EltTy) / 8); + CharUnits Padding = EltSize.alignTo(CBufferRowSize) - EltSize; + + // If we don't have any padding between elements then we just need the array + // itself. + if (Count < 2 || Padding.isZero()) + return llvm::ArrayType::get(EltTy, Count); + + llvm::LLVMContext &Context = CGM.getLLVMContext(); + llvm::Type *PaddingTy = + CGM.getTargetCodeGenInfo().getHLSLPadding(CGM, Padding); + auto *PaddedEltTy = + llvm::StructType::get(Context, {EltTy, PaddingTy}, /*isPacked=*/true); + return llvm::StructType::get( + Context, {llvm::ArrayType::get(PaddedEltTy, Count - 1), EltTy}, + /*IsPacked=*/true); +} - // calculate or get element offset for the vector or scalar - if (Packoffset != -1) { - ElemOffset = Packoffset; - } else { - ElemOffset = llvm::alignTo(EndOffset, Align); - // if the element does not fit, move it to the next row - if (ElemOffset + ElemSize > NextRowOffset) - ElemOffset = NextRowOffset; - } - } +llvm::Type *HLSLBufferLayoutBuilder::layOutType(QualType Ty) { + if (const auto *AT = CGM.getContext().getAsConstantArrayType(Ty)) + return layOutArray(AT); - // Update end offset of the layout; do not update it if the EndOffset - // is already bigger than the new value (which may happen with unordered - // packoffset annotations) - unsigned NewEndOffset = - ElemOffset + (ArrayCount - 1) * ArrayStride + ElemSize; - EndOffset = std::max(EndOffset, NewEndOffset); + if (Ty->isStructureOrClassType()) + return layOutStruct(Ty->getAsCanonical()); - // add the layout element and offset to the lists - FieldOffset = ElemOffset; - FieldType = ElemLayoutTy; - return true; + return CGM.getTypes().ConvertTypeForMem(Ty); } } // namespace CodeGen diff --git a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h index 61240b280cfcb..0515b469f8b03 100644 --- a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h +++ b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h @@ -6,13 +6,11 @@ // //===----------------------------------------------------------------------===// +#include "clang/AST/TypeBase.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/DerivedTypes.h" namespace clang { -class RecordType; -class FieldDecl; - namespace CodeGen { class CodeGenModule; @@ -24,23 +22,36 @@ class CodeGenModule; class HLSLBufferLayoutBuilder { private: CodeGenModule &CGM; - llvm::StringRef LayoutTypeName; public: - HLSLBufferLayoutBuilder(CodeGenModule &CGM, llvm::StringRef LayoutTypeName) - : CGM(CGM), LayoutTypeName(LayoutTypeName) {} - - // Returns LLVM target extension type with the name LayoutTypeName - // for given structure type and layout data. The first number in - // the Layout is the size followed by offsets for each struct element. - llvm::TargetExtType * - createLayoutType(const RecordType *StructType, - const llvm::SmallVector *Packoffsets = nullptr); - -private: - bool layoutField(const clang::FieldDecl *FD, unsigned &EndOffset, - unsigned &FieldOffset, llvm::Type *&FieldType, - int Packoffset = -1); + HLSLBufferLayoutBuilder(CodeGenModule &CGM) : CGM(CGM) {} + + /// Lays out a struct type following HLSL buffer rules and considering + /// PackOffsets, if provided. Previously created layout structs are cached by + /// CGHLSLRuntime. + /// + /// The function iterates over all fields of the record type (including base + /// classes) and calls layoutField to converts each field to its corresponding + /// LLVM type and to calculate its HLSL constant buffer layout. Any embedded + /// structs (or arrays of structs) are converted to layout types as well. + /// + /// When PackOffsets are specified the elements will be placed based on the + /// user-specified offsets. Not all elements must have a + /// packoffset/register(c#) annotation though. For those that don't, the + /// PackOffsets array will contain -1 value instead. These elements must be + /// placed at the end of the layout after all of the elements with specific + /// offset. + llvm::StructType * + layOutStruct(const RecordType *StructType, + const llvm::SmallVector *Packoffsets = nullptr); + + /// Lays out an array type following HLSL buffer rules. + llvm::Type * + layOutArray(const ConstantArrayType *AT); + + /// Lays out a type following HLSL buffer rules. Arrays and structures will be + /// padded appropriately and nested objects will be converted as appropriate. + llvm::Type *layOutType(QualType Type); }; } // namespace CodeGen diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index d0edae1295094..8b59fde4b4120 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -448,6 +448,20 @@ class TargetCodeGenInfo { return nullptr; } + virtual llvm::Type * + getHLSLPadding(CodeGenModule &CGM, CharUnits NumBytes) const { + return llvm::ArrayType::get(llvm::Type::getInt8Ty(CGM.getLLVMContext()), + NumBytes.getQuantity()); + } + + virtual bool isHLSLPadding(llvm::Type *Ty) const { + // TODO: Do we actually want to default these functions like this? + if (auto *AT = dyn_cast(Ty)) + if (AT->getElementType() == llvm::Type::getInt8Ty(Ty->getContext())) + return true; + return false; + } + // Set the Branch Protection Attributes of the Function accordingly to the // BPI. Remove attributes that contradict with current BPI. static void diff --git a/clang/lib/CodeGen/Targets/DirectX.cpp b/clang/lib/CodeGen/Targets/DirectX.cpp index b4cebb9a32aca..6f6eb8beed5b9 100644 --- a/clang/lib/CodeGen/Targets/DirectX.cpp +++ b/clang/lib/CodeGen/Targets/DirectX.cpp @@ -32,6 +32,19 @@ class DirectXTargetCodeGenInfo : public TargetCodeGenInfo { llvm::Type * getHLSLType(CodeGenModule &CGM, const Type *T, const SmallVector *Packoffsets = nullptr) const override; + + llvm::Type *getHLSLPadding(CodeGenModule &CGM, + CharUnits NumBytes) const override { + unsigned Size = NumBytes.getQuantity(); + return llvm::TargetExtType::get(CGM.getLLVMContext(), "dx.Padding", {}, + {Size}); + } + + bool isHLSLPadding(llvm::Type *Ty) const override { + if (auto *TET = dyn_cast(Ty)) + return TET->getName() == "dx.Padding"; + return false; + } }; llvm::Type *DirectXTargetCodeGenInfo::getHLSLType( @@ -75,10 +88,9 @@ llvm::Type *DirectXTargetCodeGenInfo::getHLSLType( if (ContainedTy.isNull() || !ContainedTy->isStructureType()) return nullptr; - llvm::Type *BufferLayoutTy = - HLSLBufferLayoutBuilder(CGM, "dx.Layout") - .createLayoutType(ContainedTy->castAsCanonical(), - Packoffsets); + llvm::StructType *BufferLayoutTy = + HLSLBufferLayoutBuilder(CGM).layOutStruct( + ContainedTy->getAsCanonical(), Packoffsets); if (!BufferLayoutTy) return nullptr; diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index 80e096ecf5ae9..a523cef801487 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -56,6 +56,20 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo { llvm::Type * getHLSLType(CodeGenModule &CGM, const Type *Ty, const SmallVector *Packoffsets = nullptr) const override; + + llvm::Type * + getHLSLPadding(CodeGenModule &CGM, CharUnits NumBytes) const override { + unsigned Size = NumBytes.getQuantity(); + return llvm::TargetExtType::get(CGM.getLLVMContext(), "spirv.Padding", {}, + {Size}); + } + + bool isHLSLPadding(llvm::Type *Ty) const override { + if (auto *TET = dyn_cast(Ty)) + return TET->getName() == "spirv.Padding"; + return false; + } + llvm::Type *getSPIRVImageTypeFromHLSLResource( const HLSLAttributedResourceType::Attributes &attributes, QualType SampledType, CodeGenModule &CGM) const; @@ -533,10 +547,9 @@ llvm::Type *CommonSPIRTargetCodeGenInfo::getHLSLType( if (ContainedTy.isNull() || !ContainedTy->isStructureType()) return nullptr; - llvm::Type *BufferLayoutTy = - HLSLBufferLayoutBuilder(CGM, "spirv.Layout") - .createLayoutType(ContainedTy->castAsCanonical(), - Packoffsets); + llvm::StructType *BufferLayoutTy = + HLSLBufferLayoutBuilder(CGM).layOutStruct( + ContainedTy->getAsCanonical(), Packoffsets); uint32_t StorageClass = /* Uniform storage class */ 2; return llvm::TargetExtType::get(Ctx, "spirv.VulkanBuffer", {BufferLayoutTy}, {StorageClass, false}); diff --git a/clang/test/CodeGenHLSL/ArrayAssignable.hlsl b/clang/test/CodeGenHLSL/ArrayAssignable.hlsl index aaa486eff10b7..0a71801a860d2 100644 --- a/clang/test/CodeGenHLSL/ArrayAssignable.hlsl +++ b/clang/test/CodeGenHLSL/ArrayAssignable.hlsl @@ -5,18 +5,19 @@ struct S { float f; }; -// CHECK: [[CBLayout:%.*]] = type <{ [2 x float], [2 x <4 x i32>], [2 x [2 x i32]], [1 x target("dx.Layout", %S, 8, 0, 4)] }> -// CHECK: @CBArrays.cb = global target("dx.CBuffer", target("dx.Layout", [[CBLayout]], 136, 0, 32, 64, 128)) -// CHECK: @c1 = external hidden addrspace(2) global [2 x float], align 4 +// CHECK: [[CBLayout:%.*]] = type <{ <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12), [2 x <4 x i32>], <{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, target("dx.Padding", 12), <{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }> }> + +// CHECK: @CBArrays.cb = global target("dx.CBuffer", [[CBLayout]]) +// CHECK: @c1 = external hidden addrspace(2) global <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, align 4 // CHECK: @c2 = external hidden addrspace(2) global [2 x <4 x i32>], align 16 -// CHECK: @c3 = external hidden addrspace(2) global [2 x [2 x i32]], align 4 -// CHECK: @c4 = external hidden addrspace(2) global [1 x target("dx.Layout", %S, 8, 0, 4)], align 1 +// CHECK: @c3 = external hidden addrspace(2) global <{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, align 4 +// CHECK: @c4 = external hidden addrspace(2) global <{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, align 1 cbuffer CBArrays : register(b0) { float c1[2]; int4 c2[2]; int c3[2][2]; - S c4[1]; + S c4[2]; } // CHECK-LABEL: define hidden void {{.*}}arr_assign1 @@ -140,40 +141,71 @@ void arr_assign7() { // CHECK-LABEL: define hidden void {{.*}}arr_assign8 // CHECK: [[C:%.*]] = alloca [2 x float], align 4 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[C]], ptr align 4 {{.*}}, i32 8, i1 false) -// CHECK-NEXT: call void @llvm.memcpy.p0.p2.i32(ptr align 4 [[C]], ptr addrspace(2) align 4 @c1, i32 8, i1 false) +// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x float], ptr [[C]], i32 0 +// CHECK-NEXT: [[L0:%.*]] = load float, ptr addrspace(2) @c1, align 4 +// CHECK-NEXT: store float [[L0]], ptr [[V0]], align 4 +// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x float], ptr [[C]], i32 0, i32 1 +// CHECK-NEXT: [[L1:%.*]] = load float, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, ptr addrspace(2) @c1, i32 0, i32 1), align 4 +// CHECK-NEXT: store float [[L1]], ptr [[V1]], align 4 // CHECK-NEXT: ret void void arr_assign8() { - float C[2] = {1.0, 2.0}; + float C[2]; C = c1; } +// TODO: A memcpy would actually be valid here, since everything is aligned on +// 16 byte boundaries. +// // CHECK-LABEL: define hidden void {{.*}}arr_assign9 // CHECK: [[C:%.*]] = alloca [2 x <4 x i32>], align 16 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[C]], ptr align 16 {{.*}}, i32 32, i1 false) -// CHECK-NEXT: call void @llvm.memcpy.p0.p2.i32(ptr align 16 [[C]], ptr addrspace(2) align 16 @c2, i32 32, i1 false) +// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[C]], i32 0 +// CHECK-NEXT: [[L0:%.*]] = load <4 x i32>, ptr addrspace(2) @c2, align 16 +// CHECK-NEXT: store <4 x i32> [[L0]], ptr [[V0]], align 16 +// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[C]], i32 0, i32 1 +// CHECK-NEXT: [[L1:%.*]] = load <4 x i32>, ptr addrspace(2) getelementptr inbounds ([2 x <4 x i32>], ptr addrspace(2) @c2, i32 0, i32 1), align 16 +// CHECK-NEXT: store <4 x i32> [[L1]], ptr [[V1]], align 16 // CHECK-NEXT: ret void void arr_assign9() { - int4 C[2] = {1,2,3,4,5,6,7,8}; + int4 C[2]; C = c2; } // CHECK-LABEL: define hidden void {{.*}}arr_assign10 // CHECK: [[C:%.*]] = alloca [2 x [2 x i32]], align 4 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[C]], ptr align 4 {{.*}}, i32 16, i1 false) -// CHECK-NEXT: call void @llvm.memcpy.p0.p2.i32(ptr align 4 [[C]], ptr addrspace(2) align 4 @c3, i32 16, i1 false) +// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 0 +// CHECK-NEXT: [[L0:%.*]] = load i32, ptr addrspace(2) @c3, align 4 +// CHECK-NEXT: store i32 [[L0]], ptr [[V0]], align 4 +// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 0, i32 1 +// CHECK-NEXT: [[L1:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, ptr addrspace(2) @c3, i32 0, i32 0, i32 0, i32 1), align 4 +// CHECK-NEXT: store i32 [[L1]], ptr [[V1]], align 4 +// CHECK-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 1, i32 0 +// CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, ptr addrspace(2) @c3, i32 0, i32 1, i32 0, i32 0), align 4 +// CHECK-NEXT: store i32 [[L2]], ptr [[V2]], align 4 +// CHECK-NEXT: [[V3:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 1, i32 1 +// CHECK-NEXT: [[L3:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, ptr addrspace(2) @c3, i32 0, i32 1, i32 1), align 4 +// CHECK-NEXT: store i32 [[L3]], ptr [[V3]], align 4 // CHECK-NEXT: ret void void arr_assign10() { - int C[2][2] = {1,2,3,4}; + int C[2][2]; C = c3; } // CHECK-LABEL: define hidden void {{.*}}arr_assign11 -// CHECK: [[C:%.*]] = alloca [1 x %struct.S], align 1 -// CHECK: call void @llvm.memcpy.p0.p2.i32(ptr align 1 [[C]], ptr addrspace(2) align 1 @c4, i32 8, i1 false) +// CHECK: [[C:%.*]] = alloca [2 x %struct.S], align 1 +// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 0 +// CHECK-NEXT: [[L0:%.*]] = load i32, ptr addrspace(2) @c4, align 4 +// CHECK-NEXT: store i32 [[L0]], ptr [[V0]], align 4 +// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 0, i32 1 +// CHECK-NEXT: [[L1:%.*]] = load float, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, ptr addrspace(2) @c4, i32 0, i32 0, i32 0, i32 1), align 4 +// CHECK-NEXT: store float [[L1]], ptr [[V1]], align 4 +// CHECK-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 1, i32 0 +// CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, ptr addrspace(2) @c4, i32 0, i32 1, i32 0), align 4 +// CHECK-NEXT: store i32 [[L2]], ptr [[V2]], align 4 +// CHECK-NEXT: [[V3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 1, i32 1 +// CHECK-NEXT: [[L3:%.*]] = load float, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, ptr addrspace(2) @c4, i32 0, i32 1, i32 1), align 4 +// CHECK-NEXT: store float [[L3]], ptr [[V3]], align 4 // CHECK-NEXT: ret void void arr_assign11() { - S s = {1, 2.0}; - S C[1] = {s}; + S C[2]; C = c4; } diff --git a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl index b36682e065b3a..df82e8201ba55 100644 --- a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl +++ b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl @@ -37,9 +37,9 @@ void main(unsigned GI : SV_GroupIndex) {} // INLINE-NEXT: alloca // INLINE-NEXT: store i32 12 // INLINE-NEXT: store i32 13 -// INLINE-NEXT: %[[HANDLE:.*]] = call target("dx.CBuffer", target("dx.Layout", %"__cblayout_$Globals", 4, 0)) -// INLINE-NEXT-SAME: @"llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_$Globalss_4_0tt"(i32 0, i32 0, i32 1, i32 0, i1 false) -// INLINE-NEXT: store target("dx.CBuffer", target("dx.Layout", %"__cblayout_$Globals", 4, 0)) %[[HANDLE]], ptr @"$Globals.cb", align 4 +// INLINE-NEXT: %[[HANDLE:.*]] = call target("dx.CBuffer", %"__cblayout_$Globals") +// INLINE-NEXT-SAME: @"llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_$Globalsst"(i32 0, i32 0, i32 1, i32 0, i1 false, ptr @"$Globals.str") +// INLINE-NEXT: store target("dx.CBuffer", %"__cblayout_$Globals") %[[HANDLE]], ptr @"$Globals.cb", align 4 // INLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group() // INLINE-NEXT: store i32 % // INLINE-NEXT: store i32 0 diff --git a/clang/test/CodeGenHLSL/resources/cbuffer.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer.hlsl index 8dcff5dad9d13..ab37ae0bc4e68 100644 --- a/clang/test/CodeGenHLSL/resources/cbuffer.hlsl +++ b/clang/test/CodeGenHLSL/resources/cbuffer.hlsl @@ -1,37 +1,121 @@ // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute -fnative-half-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s -// CHECK: %__cblayout_CBScalars = type <{ float, double, half, i64, i32, i16, i32, i64 }> -// CHECK: %__cblayout_CBVectors = type <{ <3 x float>, <3 x double>, <2 x half>, <3 x i64>, <4 x i32>, <3 x i16>, <3 x i64> }> -// CHECK: %__cblayout_CBArrays = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [2 x [3 x [4 x <4 x i32>]]], [1 x i16], [2 x i64], [4 x i32] }> -// CHECK: %__cblayout_CBStructs = type <{ target("dx.Layout", %A, 8, 0), target("dx.Layout", %B, 14, 0, 8), -// CHECK-SAME: target("dx.Layout", %C, 24, 0, 16), [5 x target("dx.Layout", %A, 8, 0)], -// CHECK-SAME: target("dx.Layout", %__cblayout_D, 94, 0), half, <3 x i16> }> +// CHECK: %__cblayout_CBScalars = type <{ +// CHECK-SAME: float, target("dx.Padding", 4), double, +// CHECK-SAME: half, target("dx.Padding", 6), i64, +// CHECK-SAME: i32, i16, target("dx.Padding", 2), i32, target("dx.Padding", 4), +// CHECK-SAME: i64 +// CHECK-SAME: }> + +// CHECK: %__cblayout_CBVectors = type <{ +// CHECK-SAME: <3 x float>, target("dx.Padding", 4), +// CHECK-SAME: <3 x double>, <2 x half>, target("dx.Padding", 4), +// CHECK-SAME: <3 x i64>, target("dx.Padding", 8), +// CHECK-SAME: <4 x i32>, +// CHECK-SAME: <3 x i16>, target("dx.Padding", 10), +// CHECK-SAME: <3 x i64> +// CHECK-SAME: }> + +// CHECK: %__cblayout_CBArrays = type <{ +// CHECK-SAME: <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12), +// CHECK-SAME: <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, target("dx.Padding", 8), +// CHECK-SAME: <{ [1 x <{ +// CHECK-SAME: <{ [1 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14) }>], +// CHECK-SAME: <{ [1 x <{ half, target("dx.Padding", 14) }>], half }> +// CHECK-SAME: }>, target("dx.Padding", 14), +// CHECK-SAME: <{ [2 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8), +// CHECK-SAME: [2 x [3 x [4 x <4 x i32>]]] +// CHECK-SAME: [1 x i16], target("dx.Padding", 14), +// CHECK-SAME: <{ [1 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8), +// CHECK-SAME: <{ [3 x <{ i32, target("dx.Padding", 12) }>], i32 }> +// CHECK-SAME: }> + +// CHECK: %__cblayout_CBStructs = type <{ +// CHECK-SAME: %A, target("dx.Padding", 8), + +// TODO: We should have target("dx.Padding", 2) padding after %B, but we don't correctly handle +// 2- and 3-element vectors inside structs yet because of DataLayout rules. +// CHECK-SAME: %B, + +// CHECK-SAME: %C, target("dx.Padding", 8), +// CHECK-SAME: <{ [4 x <{ %A, target("dx.Padding", 8) }>], %A }>, target("dx.Padding", 8), +// CHECK-SAME: %__cblayout_D, half, +// CHECK-SAME: <3 x i16> +// CHECK-SAME: }> // CHECK: %A = type <{ <2 x float> }> // CHECK: %B = type <{ <2 x float>, <3 x i16> }> -// CHECK: %C = type <{ i32, target("dx.Layout", %A, 8, 0) }> -// CHECK: %__cblayout_D = type <{ [2 x [3 x target("dx.Layout", %B, 14, 0, 8)]] }> +// CHECK: %C = type <{ i32, target("dx.Padding", 12), %A }> + +// CHECK: %__cblayout_D = type <{ +// CHECK-SAME: <{ [1 x <{ +// CHECK-SAME: <{ [2 x <{ %B, target("dx.Padding", 2) }>], %B }>, target("dx.Padding", 2) +// CHECK-SAME: }>], +// CHECK-SAME: <{ [2 x <{ %B, target("dx.Padding", 2) }>], %B }> }> +// CHECK-SAME: }> + +// CHECK: %__cblayout_CBClasses = type <{ +// CHECK-SAME: %K, target("dx.Padding", 12), +// CHECK-SAME: %L, target("dx.Padding", 8), +// CHECK-SAME: %M, target("dx.Padding", 12), +// CHECK-SAME: <{ [9 x <{ %K, target("dx.Padding", 12) }>], %K }> +// CHECK-SAME: }> -// CHECK: %__cblayout_CBClasses = type <{ target("dx.Layout", %K, 4, 0), target("dx.Layout", %L, 8, 0, 4), -// CHECK-SAME: target("dx.Layout", %M, 68, 0), [10 x target("dx.Layout", %K, 4, 0)] }> // CHECK: %K = type <{ float }> // CHECK: %L = type <{ float, float }> -// CHECK: %M = type <{ [5 x target("dx.Layout", %K, 4, 0)] }> - -// CHECK: %__cblayout_CBMix = type <{ [2 x target("dx.Layout", %Test, 8, 0, 4)], float, [3 x [2 x <2 x float>]], float, -// CHECK-SAME: target("dx.Layout", %anon, 4, 0), double, target("dx.Layout", %anon.0, 8, 0), float, <1 x double>, i16 }> +// CHECK: %M = type <{ <{ [4 x <{ %K, target("dx.Padding", 12) }>], %K }> }> + +// CHECK: %__cblayout_CBMix = type <{ +// CHECK-SAME: <{ [1 x <{ %Test, target("dx.Padding", 8) }>], %Test }>, float, target("dx.Padding", 4) +// CHECK-SAME: <{ [2 x <{ +// CHECK-SAME: <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }>, target("dx.Padding", 8) }>], +// CHECK-SAME: <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }> +// CHECK-SAME: }>, float, target("dx.Padding", 4), +// CHECK-SAME: %anon, target("dx.Padding", 4), double, +// CHECK-SAME: %anon.0, float, target("dx.Padding", 4), +// CHECK-SAME: <1 x double>, i16 +// CHECK-SAME: }> // CHECK: %Test = type <{ float, float }> // CHECK: %anon = type <{ float }> // CHECK: %anon.0 = type <{ <2 x i32> }> -// CHECK: %__cblayout_CB_A = type <{ [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> }> -// CHECK: %__cblayout_CB_B = type <{ [3 x <3 x double>], <3 x half> }> -// CHECK: %__cblayout_CB_C = type <{ i32, target("dx.Layout", %F, 96, 0, 16, 28, 32, 56, 64, 80, 84, 90), half, target("dx.Layout", %G, 258, 0, 48, 64, 256), double }> - -// CHECK: %F = type <{ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }> -// CHECK: %G = type <{ target("dx.Layout", %E, 36, 0, 8, 16, 20, 22, 24, 32), [1 x float], [2 x target("dx.Layout", %F, 96, 0, 16, 28, 32, 56, 64, 80, 84, 90)], half }> -// CHECK: %E = type <{ float, double, float, half, i16, i64, i32 }> +// CHECK: %__cblayout_CB_A = type <{ +// CHECK-SAME: <{ [1 x <{ double, target("dx.Padding", 8) }>], double }>, target("dx.Padding", 8), +// CHECK-SAME: <{ [2 x <{ <3 x float>, target("dx.Padding", 4) }>], <3 x float> }>, float, +// CHECK-SAME: <{ [2 x <{ double, target("dx.Padding", 8) }>], double }>, half, target("dx.Padding", 6), +// CHECK-SAME: [1 x <2 x double>], +// CHECK-SAME: float, target("dx.Padding", 12), +// CHECK-SAME: <{ [1 x <{ <3 x half>, target("dx.Padding", 10) }>], <3 x half> }>, <3 x half> +// CHECK-SAME: }> + +// CHECK: %__cblayout_CB_B = type <{ +// CHECK-SAME: <{ [2 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, <3 x half> +// CHECK-SAME: }> + +// CHECK: %__cblayout_CB_C = type <{ +// CHECK-SAME: i32, target("dx.Padding", 12), +// CHECK-SAME: %F, +// CHECK-SAME: half, target("dx.Padding", 14), +// CHECK-SAME: %G, target("dx.Padding", 6), double +// CHECK-SAME: }> + +// CHECK: %F = type <{ +// CHECK-SAME: double, target("dx.Padding", 8), +// CHECK-SAME: <3 x float>, float, +// CHECK-SAME: <3 x double>, half, target("dx.Padding", 6), +// CHECK-SAME: <2 x double>, +// CHECK-SAME: float, <3 x half>, <3 x half> +// CHECK-SAME: }> + +// CHECK: %G = type <{ +// CHECK-SAME: %E, target("dx.Padding", 12), +// CHECK-SAME: [1 x float], target("dx.Padding", 12), +// CHECK-SAME: [2 x %F], +// CHECK-SAME: half +// CHECK-SAME: }> + +// CHECK: %E = type <{ float, target("dx.Padding", 4), double, float, half, i16, i64, i32 }> cbuffer CBScalars : register(b1, space5) { float a1; @@ -44,8 +128,7 @@ cbuffer CBScalars : register(b1, space5) { int64_t a8; } -// CHECK: @CBScalars.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars, -// CHECK-SAME: 56, 0, 8, 16, 24, 32, 36, 40, 48)) +// CHECK: @CBScalars.cb = global target("dx.CBuffer", %__cblayout_CBScalars) // CHECK: @a1 = external hidden addrspace(2) global float, align 4 // CHECK: @a2 = external hidden addrspace(2) global double, align 8 // CHECK: @a3 = external hidden addrspace(2) global half, align 2 @@ -67,8 +150,7 @@ cbuffer CBVectors { // FIXME: add a bool vectors after llvm-project/llvm#91639 is added } -// CHECK: @CBVectors.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBVectors, -// CHECK-SAME: 136, 0, 16, 40, 48, 80, 96, 112)) +// CHECK: @CBVectors.cb = global target("dx.CBuffer", %__cblayout_CBVectors) // CHECK: @b1 = external hidden addrspace(2) global <3 x float>, align 16 // CHECK: @b2 = external hidden addrspace(2) global <3 x double>, align 32 // CHECK: @b3 = external hidden addrspace(2) global <2 x half>, align 4 @@ -89,16 +171,15 @@ cbuffer CBArrays : register(b2) { bool c8[4]; } -// CHECK: @CBArrays.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays, -// CHECK-SAME: 708, 0, 48, 112, 176, 224, 608, 624, 656)) -// CHECK: @c1 = external hidden addrspace(2) global [3 x float], align 4 -// CHECK: @c2 = external hidden addrspace(2) global [2 x <3 x double>], align 32 -// CHECK: @c3 = external hidden addrspace(2) global [2 x [2 x half]], align 2 -// CHECK: @c4 = external hidden addrspace(2) global [3 x i64], align 8 +// CHECK: @CBArrays.cb = global target("dx.CBuffer", %__cblayout_CBArrays) +// CHECK: @c1 = external hidden addrspace(2) global <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, align 4 +// CHECK: @c2 = external hidden addrspace(2) global <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, align 32 +// CHECK: @c3 = external hidden addrspace(2) global <{ [1 x <{ <{ [1 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14) }>], <{ [1 x <{ half, target("dx.Padding", 14) }>], half }> }>, align 2 +// CHECK: @c4 = external hidden addrspace(2) global <{ [2 x <{ i64, target("dx.Padding", 8) }>], i64 }>, align 8 // CHECK: @c5 = external hidden addrspace(2) global [2 x [3 x [4 x <4 x i32>]]], align 16 // CHECK: @c6 = external hidden addrspace(2) global [1 x i16], align 2 -// CHECK: @c7 = external hidden addrspace(2) global [2 x i64], align 8 -// CHECK: @c8 = external hidden addrspace(2) global [4 x i32], align 4 +// CHECK: @c7 = external hidden addrspace(2) global <{ [1 x <{ i64, target("dx.Padding", 8) }>], i64 }>, align 8 +// CHECK: @c8 = external hidden addrspace(2) global <{ [3 x <{ i32, target("dx.Padding", 12) }>], i32 }>, align 4 // CHECK: @CBArrays.str = private unnamed_addr constant [9 x i8] c"CBArrays\00", align 1 typedef uint32_t4 uint32_t8[2]; @@ -110,8 +191,7 @@ cbuffer CBTypedefArray : register(space2) { T2 t2[2]; } -// CHECK: @CBTypedefArray.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBTypedefArray, -// CHECK-SAME: 128, 0, 64)) +// CHECK: @CBTypedefArray.cb = global target("dx.CBuffer", %__cblayout_CBTypedefArray) // CHECK: @t1 = external hidden addrspace(2) global [2 x [2 x <4 x i32>]], align 16 // CHECK: @t2 = external hidden addrspace(2) global [2 x [2 x <4 x i32>]], align 16 // CHECK: @CBTypedefArray.str = private unnamed_addr constant [15 x i8] c"CBTypedefArray\00", align 1 @@ -135,13 +215,12 @@ struct D { Empty es; }; -// CHECK: @CBStructs.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBStructs, -// CHECK-SAME: 246, 0, 16, 32, 64, 144, 238, 240)) -// CHECK: @a = external hidden addrspace(2) global target("dx.Layout", %A, 8, 0), align 1 -// CHECK: @b = external hidden addrspace(2) global target("dx.Layout", %B, 14, 0, 8), align 1 -// CHECK: @c = external hidden addrspace(2) global target("dx.Layout", %C, 24, 0, 16), align 1 -// CHECK: @array_of_A = external hidden addrspace(2) global [5 x target("dx.Layout", %A, 8, 0)], align 1 -// CHECK: @d = external hidden addrspace(2) global target("dx.Layout", %__cblayout_D, 94, 0), align 1 +// CHECK: @CBStructs.cb = global target("dx.CBuffer", %__cblayout_CBStructs) +// CHECK: @a = external hidden addrspace(2) global %A, align 1 +// CHECK: @b = external hidden addrspace(2) global %B, align 1 +// CHECK: @c = external hidden addrspace(2) global %C, align 1 +// CHECK: @array_of_A = external hidden addrspace(2) global <{ [4 x <{ %A, target("dx.Padding", 8) }>], %A }>, align 1 +// CHECK: @d = external hidden addrspace(2) global %__cblayout_D, align 1 // CHECK: @e = external hidden addrspace(2) global half, align 2 // CHECK: @f = external hidden addrspace(2) global <3 x i16>, align 8 // CHECK: @CBStructs.str = private unnamed_addr constant [10 x i8] c"CBStructs\00", align 1 @@ -176,27 +255,25 @@ cbuffer CBClasses { K ka[10]; }; -// CHECK: @CBClasses.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBClasses, -// CHECK-SAME: 260, 0, 16, 32, 112)) -// CHECK: @k = external hidden addrspace(2) global target("dx.Layout", %K, 4, 0), align 1 -// CHECK: @l = external hidden addrspace(2) global target("dx.Layout", %L, 8, 0, 4), align 1 -// CHECK: @m = external hidden addrspace(2) global target("dx.Layout", %M, 68, 0), align 1 -// CHECK: @ka = external hidden addrspace(2) global [10 x target("dx.Layout", %K, 4, 0)], align 1 +// CHECK: @CBClasses.cb = global target("dx.CBuffer", %__cblayout_CBClasses) +// CHECK: @k = external hidden addrspace(2) global %K, align 1 +// CHECK: @l = external hidden addrspace(2) global %L, align 1 +// CHECK: @m = external hidden addrspace(2) global %M, align 1 +// CHECK: @ka = external hidden addrspace(2) global <{ [9 x <{ %K, target("dx.Padding", 12) }>], %K }>, align 1 // CHECK: @CBClasses.str = private unnamed_addr constant [10 x i8] c"CBClasses\00", align 1 struct Test { float a, b; }; -// CHECK: @CBMix.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBMix, -// CHECK-SAME: 170, 0, 24, 32, 120, 128, 136, 144, 152, 160, 168)) -// CHECK: @test = external hidden addrspace(2) global [2 x target("dx.Layout", %Test, 8, 0, 4)], align 1 +// CHECK: @CBMix.cb = global target("dx.CBuffer", %__cblayout_CBMix) +// CHECK: @test = external hidden addrspace(2) global <{ [1 x <{ %Test, target("dx.Padding", 8) }>], %Test }>, align 1 // CHECK: @f1 = external hidden addrspace(2) global float, align 4 -// CHECK: @f2 = external hidden addrspace(2) global [3 x [2 x <2 x float>]], align 8 +// CHECK: @f2 = external hidden addrspace(2) global <{ [2 x <{ <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }>, target("dx.Padding", 8) }>], <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }> }>, align 8 // CHECK: @f3 = external hidden addrspace(2) global float, align 4 -// CHECK: @f4 = external hidden addrspace(2) global target("dx.Layout", %anon, 4, 0), align 1 +// CHECK: @f4 = external hidden addrspace(2) global %anon, align 1 // CHECK: @f5 = external hidden addrspace(2) global double, align 8 -// CHECK: @f6 = external hidden addrspace(2) global target("dx.Layout", %anon.0, 8, 0), align 1 +// CHECK: @f6 = external hidden addrspace(2) global %anon.0, align 1 // CHECK: @f7 = external hidden addrspace(2) global float, align 4 // CHECK: @f8 = external hidden addrspace(2) global <1 x double>, align 8 // CHECK: @f9 = external hidden addrspace(2) global i16, align 2 @@ -215,7 +292,7 @@ cbuffer CBMix { uint16_t f9; }; -// CHECK: @CB_A.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_A, 188, 0, 32, 76, 80, 120, 128, 144, 160, 182)) +// CHECK: @CB_A.cb = global target("dx.CBuffer", %__cblayout_CB_A) cbuffer CB_A { double B0[2]; @@ -229,7 +306,7 @@ cbuffer CB_A { half3 B8; } -// CHECK: @CB_B.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_B, 94, 0, 88)) +// CHECK: @CB_B.cb = global target("dx.CBuffer", %__cblayout_CB_B) cbuffer CB_B { double3 B9[3]; half3 B10; @@ -264,7 +341,7 @@ struct G { half C3; }; -// CHECK: @CB_C.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_C, 400, 0, 16, 112, 128, 392)) +// CHECK: @CB_C.cb = global target("dx.CBuffer", %__cblayout_CB_C) cbuffer CB_C { int D0; F D1; @@ -275,63 +352,63 @@ cbuffer CB_C { // CHECK: define internal void @_init_buffer_CBScalars.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CBScalars.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars, 56, 0, 8, 16, 24, 32, 36, 40, 48)) -// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBScalarss_56_0_8_16_24_32_36_40_48tt(i32 5, i32 1, i32 1, i32 0, ptr @CBScalars.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars, 56, 0, 8, 16, 24, 32, 36, 40, 48)) %CBScalars.cb_h, ptr @CBScalars.cb, align 4 +// CHECK-NEXT: %CBScalars.cb_h = call target("dx.CBuffer", %__cblayout_CBScalars) +// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_s___cblayout_CBScalarsst(i32 5, i32 1, i32 1, i32 0, ptr @CBScalars.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBScalars) %CBScalars.cb_h, ptr @CBScalars.cb, align 4 // CHECK: define internal void @_init_buffer_CBVectors.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CBVectors.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBVectors, 136, 0, 16, 40, 48, 80, 96, 112)) -// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBVectorss_136_0_16_40_48_80_96_112tt(i32 0, i32 0, i32 1, i32 0, ptr @CBVectors.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBVectors, 136, 0, 16, 40, 48, 80, 96, 112)) %CBVectors.cb_h, ptr @CBVectors.cb, align 4 +// CHECK-NEXT: %CBVectors.cb_h = call target("dx.CBuffer", %__cblayout_CBVectors) +// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBVectorsst(i32 0, i32 0, i32 1, i32 0, ptr @CBVectors.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBVectors) %CBVectors.cb_h, ptr @CBVectors.cb, align 4 // CHECK: define internal void @_init_buffer_CBArrays.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CBArrays.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays, 708, 0, 48, 112, 176, 224, 608, 624, 656)) -// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBArrayss_708_0_48_112_176_224_608_624_656tt(i32 0, i32 2, i32 1, i32 0, ptr @CBArrays.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays, 708, 0, 48, 112, 176, 224, 608, 624, 656)) %CBArrays.cb_h, ptr @CBArrays.cb, align 4 +// CHECK-NEXT: %CBArrays.cb_h = call target("dx.CBuffer", %__cblayout_CBArrays) +// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_s___cblayout_CBArraysst(i32 0, i32 2, i32 1, i32 0, ptr @CBArrays.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBArrays) %CBArrays.cb_h, ptr @CBArrays.cb, align 4 // CHECK: define internal void @_init_buffer_CBTypedefArray.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CBTypedefArray.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBTypedefArray, 128, 0, 64)) -// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBTypedefArrays_128_0_64tt(i32 1, i32 2, i32 1, i32 0, ptr @CBTypedefArray.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBTypedefArray, 128, 0, 64)) %CBTypedefArray.cb_h, ptr @CBTypedefArray.cb, align 4 +// CHECK-NEXT: %CBTypedefArray.cb_h = call target("dx.CBuffer", %__cblayout_CBTypedefArray) +// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBTypedefArrayst(i32 1, i32 2, i32 1, i32 0, ptr @CBTypedefArray.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBTypedefArray) %CBTypedefArray.cb_h, ptr @CBTypedefArray.cb, align 4 // CHECK: define internal void @_init_buffer_CBStructs.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CBStructs.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBStructs, 246, 0, 16, 32, 64, 144, 238, 240)) -// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBStructss_246_0_16_32_64_144_238_240tt(i32 2, i32 0, i32 1, i32 0, ptr @CBStructs.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBStructs, 246, 0, 16, 32, 64, 144, 238, 240)) %CBStructs.cb_h, ptr @CBStructs.cb, align 4 +// CHECK-NEXT: %CBStructs.cb_h = call target("dx.CBuffer", %__cblayout_CBStructs) +// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBStructsst(i32 2, i32 0, i32 1, i32 0, ptr @CBStructs.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBStructs) %CBStructs.cb_h, ptr @CBStructs.cb, align 4 // CHECK: define internal void @_init_buffer_CBClasses.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CBClasses.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBClasses, 260, 0, 16, 32, 112)) -// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBClassess_260_0_16_32_112tt(i32 3, i32 0, i32 1, i32 0, ptr @CBClasses.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBClasses, 260, 0, 16, 32, 112)) %CBClasses.cb_h, ptr @CBClasses.cb, align 4 +// CHECK-NEXT: %CBClasses.cb_h = call target("dx.CBuffer", %__cblayout_CBClasses) +// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBClassesst(i32 3, i32 0, i32 1, i32 0, ptr @CBClasses.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBClasses) %CBClasses.cb_h, ptr @CBClasses.cb, align 4 // CHECK: define internal void @_init_buffer_CBMix.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CBMix.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBMix, 170, 0, 24, 32, 120, 128, 136, 144, 152, 160, 168)) -// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBMixs_170_0_24_32_120_128_136_144_152_160_168tt(i32 4, i32 0, i32 1, i32 0, ptr @CBMix.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBMix, 170, 0, 24, 32, 120, 128, 136, 144, 152, 160, 168)) %CBMix.cb_h, ptr @CBMix.cb, align 4 +// CHECK-NEXT: %CBMix.cb_h = call target("dx.CBuffer", %__cblayout_CBMix) +// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBMixst(i32 4, i32 0, i32 1, i32 0, ptr @CBMix.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBMix) %CBMix.cb_h, ptr @CBMix.cb, align 4 // CHECK: define internal void @_init_buffer_CB_A.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CB_A.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_A, 188, 0, 32, 76, 80, 120, 128, 144, 160, 182)) -// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CB_As_188_0_32_76_80_120_128_144_160_182tt(i32 5, i32 0, i32 1, i32 0, ptr @CB_A.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_A, 188, 0, 32, 76, 80, 120, 128, 144, 160, 182)) %CB_A.cb_h, ptr @CB_A.cb, align 4 +// CHECK-NEXT: %CB_A.cb_h = call target("dx.CBuffer", %__cblayout_CB_A) +// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CB_Ast(i32 5, i32 0, i32 1, i32 0, ptr @CB_A.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CB_A) %CB_A.cb_h, ptr @CB_A.cb, align 4 // CHECK: define internal void @_init_buffer_CB_B.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CB_B.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_B, 94, 0, 88)) -// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CB_Bs_94_0_88tt(i32 6, i32 0, i32 1, i32 0, ptr @CB_B.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_B, 94, 0, 88)) %CB_B.cb_h, ptr @CB_B.cb, align 4 +// CHECK-NEXT: %CB_B.cb_h = call target("dx.CBuffer", %__cblayout_CB_B) +// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CB_Bst(i32 6, i32 0, i32 1, i32 0, ptr @CB_B.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CB_B) %CB_B.cb_h, ptr @CB_B.cb, align 4 // CHECK: define internal void @_init_buffer_CB_C.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CB_C.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_C, 400, 0, 16, 112, 128, 392)) -// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CB_Cs_400_0_16_112_128_392tt(i32 7, i32 0, i32 1, i32 0, ptr @CB_C.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_C, 400, 0, 16, 112, 128, 392)) %CB_C.cb_h, ptr @CB_C.cb, align 4 +// CHECK-NEXT: %CB_C.cb_h = call target("dx.CBuffer", %__cblayout_CB_C) +// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CB_Cst(i32 7, i32 0, i32 1, i32 0, ptr @CB_C.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CB_C) %CB_C.cb_h, ptr @CB_C.cb, align 4 RWBuffer Buf; diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl index b7bdce32e6507..1fe0a68261c94 100644 --- a/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl +++ b/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl @@ -4,18 +4,18 @@ // CHECK: %"n0::n1::__cblayout_A" = type <{ float }> // CHECK: %"n0::__cblayout_B" = type <{ float }> -// CHECK: %"n0::n2::__cblayout_C" = type <{ float, target("dx.Layout", %"n0::Foo", 4, 0) }> +// CHECK: %"n0::n2::__cblayout_C" = type <{ float, target("dx.Padding", 12), %"n0::Foo" }> // CHECK: %"n0::Foo" = type <{ float }> -// CHECK: @A.cb = global target("dx.CBuffer", target("dx.Layout", %"n0::n1::__cblayout_A", 4, 0)) +// CHECK: @A.cb = global target("dx.CBuffer", %"n0::n1::__cblayout_A") // CHECK: @_ZN2n02n11aE = external hidden addrspace(2) global float, align 4 -// CHECK: @B.cb = global target("dx.CBuffer", target("dx.Layout", %"n0::__cblayout_B", 4, 0)) +// CHECK: @B.cb = global target("dx.CBuffer", %"n0::__cblayout_B") // CHECK: @_ZN2n01aE = external hidden addrspace(2) global float, align 4 -// CHECK: @C.cb = global target("dx.CBuffer", target("dx.Layout", %"n0::n2::__cblayout_C", 20, 0, 16)) +// CHECK: @C.cb = global target("dx.CBuffer", %"n0::n2::__cblayout_C") // CHECK: @_ZN2n02n21aE = external hidden addrspace(2) global float, align 4 -// CHECK: external hidden addrspace(2) global target("dx.Layout", %"n0::Foo", 4, 0), align 1 +// CHECK: external hidden addrspace(2) global %"n0::Foo", align 1 namespace n0 { struct Foo { diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl index 7bedd63c9f65d..c39fd75ec6ee4 100644 --- a/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl +++ b/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl @@ -2,10 +2,13 @@ // RUN: dxil-pc-shadermodel6.3-compute %s \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -// CHECK: %__cblayout_CB = type <{ float, double, <2 x i32> }> -// CHECK: %__cblayout_CB_1 = type <{ float, <2 x float> }> +// TODO: Reordering fields doesn't work... +// XFAIL: * -// CHECK: @CB.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 176, 16, 168, 88)) +// CHECK: %__cblayout_CB = type <{ [16 x i8], float, [68 x i8], <2 x i32>, [72 x i8], double }> +// CHECK: %__cblayout_CB_1 = type <{ [80 x i8], <2 x float>, float }> + +// CHECK: @CB.cb = global target("dx.CBuffer", %__cblayout_CB) // CHECK: @a = external hidden addrspace(2) global float, align 4 // CHECK: @b = external hidden addrspace(2) global double, align 8 // CHECK: @c = external hidden addrspace(2) global <2 x i32>, align 8 @@ -17,7 +20,7 @@ cbuffer CB : register(b1, space3) { int2 c : packoffset(c5.z); } -// CHECK: @CB.cb.1 = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_1, 92, 88, 80)) +// CHECK: @CB.cb.1 = global target("dx.CBuffer", %__cblayout_CB_1) // CHECK: @x = external hidden addrspace(2) global float, align 4 // CHECK: @y = external hidden addrspace(2) global <2 x float>, align 8 @@ -30,7 +33,7 @@ cbuffer CB : register(b0) { // CHECK: define internal void @_init_buffer_CB.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CB.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 176, 16, 168, 88)) +// CHECK-NEXT: %CB.cb_h = call target("dx.CBuffer", %__cblayout_CB) // CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBs_176_16_168_88tt(i32 3, i32 1, i32 1, i32 0, ptr @CB.str) float foo() { diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl index fa3405df9e3d3..b8c7babb8d634 100644 --- a/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl +++ b/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl @@ -2,7 +2,7 @@ // CHECK: %__cblayout_A = type <{ float }> -// CHECK: @A.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_A, 4, 0)) +// CHECK: @A.cb = global target("dx.CBuffer", %__cblayout_A) // CHECK: @a = external hidden addrspace(2) global float, align 4 // CHECK-DAG: @_ZL1b = internal global float 3.000000e+00, align 4 // CHECK-NOT: @B.cb diff --git a/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl b/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl index ad4d92f8afc02..5333dad962195 100644 --- a/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl +++ b/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl @@ -1,19 +1,18 @@ // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute -fnative-half-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,DXIL // RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan1.3-compute -fnative-half-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,SPIRV -// DXIL: %"__cblayout_$Globals" = type <{ float, float, target("dx.Layout", %__cblayout_S, 4, 0) }> -// SPIRV: %"__cblayout_$Globals" = type <{ float, float, target("spirv.Layout", %__cblayout_S, 4, 0) }> +// CHECK: %"__cblayout_$Globals" = type <{ float, float, target("{{.*}}.Padding", 8), %__cblayout_S }> // CHECK: %__cblayout_S = type <{ float }> -// DXIL-DAG: @"$Globals.cb" = global target("dx.CBuffer", target("dx.Layout", %"__cblayout_$Globals", 20, 0, 4, 16)) +// DXIL-DAG: @"$Globals.cb" = global target("dx.CBuffer", %"__cblayout_$Globals") // DXIL-DAG: @a = external hidden addrspace(2) global float // DXIL-DAG: @g = external hidden addrspace(2) global float -// DXIL-DAG: @h = external hidden addrspace(2) global target("dx.Layout", %__cblayout_S, 4, 0), align 4 +// DXIL-DAG: @h = external hidden addrspace(2) global %__cblayout_S, align 4 -// SPIRV-DAG: @"$Globals.cb" = global target("spirv.VulkanBuffer", target("spirv.Layout", %"__cblayout_$Globals", 20, 0, 4, 16), 2, 0) +// SPIRV-DAG: @"$Globals.cb" = global target("spirv.VulkanBuffer", %"__cblayout_$Globals", 2, 0) // SPIRV-DAG: @a = external hidden addrspace(12) global float // SPIRV-DAG: @g = external hidden addrspace(12) global float -// SPIRV-DAG: @h = external hidden addrspace(12) global target("spirv.Layout", %__cblayout_S, 4, 0), align 8 +// SPIRV-DAG: @h = external hidden addrspace(12) global %__cblayout_S, align 8 struct EmptyStruct { }; diff --git a/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl b/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl index 1b2cb0e99aa83..fee3b77c32dd2 100644 --- a/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl +++ b/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl @@ -13,6 +13,9 @@ // CHECK-DAG: @e = external hidden addrspace(2) global <4 x float>, align 16 // CHECK-DAG: @s = external hidden addrspace(2) global target("dx.Layout", %S, 8, 0), align 1 +// TODO: Reordering fields doesn't work... +// XFAIL: * + struct S { float2 v; }; diff --git a/llvm/docs/DirectX/DXILResources.rst b/llvm/docs/DirectX/DXILResources.rst index f253e02f4cdd9..db7d4a4342eb7 100644 --- a/llvm/docs/DirectX/DXILResources.rst +++ b/llvm/docs/DirectX/DXILResources.rst @@ -277,7 +277,7 @@ Examples: Accessing Resources as Memory ----------------------------- -*relevant types: Buffers and Textures* +*relevant types: Buffers, Textures, and CBuffers* Loading and storing from resources is generally represented in LLVM using operations on memory that is only accessible via a handle object. Given a @@ -302,14 +302,14 @@ stores are described later in this document. - - Pointer - A pointer to an object in the buffer - * - ``%buffer`` + * - ``%resource`` - 0 - - ``target(dx.TypedBuffer, ...)`` - - The buffer to access + - Any buffer, texture, or cbuffer type + - The resource to access * - ``%index`` - 1 - ``i32`` - - Index into the buffer + - Index into the resource Examples: diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h index c7aff167324e6..bcbca78504041 100644 --- a/llvm/include/llvm/Analysis/DXILResource.h +++ b/llvm/include/llvm/Analysis/DXILResource.h @@ -222,27 +222,6 @@ class AnyResourceExtType : public TargetExtType { } }; -/// The dx.Layout target extension type -/// -/// `target("dx.Layout", , , [offsets...])` -class LayoutExtType : public TargetExtType { -public: - LayoutExtType() = delete; - LayoutExtType(const LayoutExtType &) = delete; - LayoutExtType &operator=(const LayoutExtType &) = delete; - - Type *getWrappedType() const { return getTypeParameter(0); } - uint32_t getSize() const { return getIntParameter(0); } - uint32_t getOffsetOfElement(int I) const { return getIntParameter(I + 1); } - - static bool classof(const TargetExtType *T) { - return T->getName() == "dx.Layout"; - } - static bool classof(const Type *T) { - return isa(T) && classof(cast(T)); - } -}; - /// The dx.Padding target extension type /// /// `target("dx.Padding", NumBytes)` diff --git a/llvm/include/llvm/Frontend/HLSL/CBuffer.h b/llvm/include/llvm/Frontend/HLSL/CBuffer.h index 694a7fa854576..f4e232ffe1745 100644 --- a/llvm/include/llvm/Frontend/HLSL/CBuffer.h +++ b/llvm/include/llvm/Frontend/HLSL/CBuffer.h @@ -46,7 +46,8 @@ class CBufferMetadata { CBufferMetadata(NamedMDNode *MD) : MD(MD) {} public: - static std::optional get(Module &M); + static std::optional + get(Module &M, llvm::function_ref IsPadding); using iterator = SmallVector::iterator; iterator begin() { return Mappings.begin(); } @@ -55,9 +56,6 @@ class CBufferMetadata { void eraseFromModule(); }; -APInt translateCBufArrayOffset(const DataLayout &DL, APInt Offset, - ArrayType *Ty); - } // namespace hlsl } // namespace llvm diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp index f9bf09262dd1f..14d9925b4297a 100644 --- a/llvm/lib/Analysis/DXILResource.cpp +++ b/llvm/lib/Analysis/DXILResource.cpp @@ -255,6 +255,12 @@ static void formatTypeName(SmallString<64> &Dest, StringRef Name, if (!ContainedType) return; + SmallVector ArrayDimensions; + while (ArrayType *AT = dyn_cast(ContainedType)) { + ArrayDimensions.push_back(AT->getNumElements()); + ContainedType = AT->getElementType(); + } + StringRef ElementName; ElementType ET = toDXILElementType(ContainedType, IsSigned); if (ET != ElementType::Invalid) { @@ -271,6 +277,8 @@ static void formatTypeName(SmallString<64> &Dest, StringRef Name, DestStream << "<" << ElementName; if (const FixedVectorType *VTy = dyn_cast(ContainedType)) DestStream << VTy->getNumElements(); + for (uint64_t Dim : ArrayDimensions) + DestStream << "[" << Dim << "]"; DestStream << ">"; } @@ -372,13 +380,6 @@ StructType *ResourceTypeInfo::createElementStruct(StringRef CBufferName) { Name.append(CBufferName); } - // TODO: Remove this when we update the frontend to use explicit padding. - if (LayoutExtType *LayoutType = - dyn_cast(RTy->getResourceType())) { - StructType *Ty = cast(LayoutType->getWrappedType()); - return StructType::create(Ty->elements(), Name); - } - return getOrCreateElementStruct( getTypeWithoutPadding(RTy->getResourceType()), Name); } @@ -490,13 +491,7 @@ ResourceTypeInfo::UAVInfo ResourceTypeInfo::getUAV() const { uint32_t ResourceTypeInfo::getCBufferSize(const DataLayout &DL) const { assert(isCBuffer() && "Not a CBuffer"); - Type *ElTy = cast(HandleTy)->getResourceType(); - - // TODO: Remove this when we update the frontend to use explicit padding. - if (auto *LayoutTy = dyn_cast(ElTy)) - return LayoutTy->getSize(); - return DL.getTypeAllocSize(ElTy); } diff --git a/llvm/lib/Frontend/HLSL/CBuffer.cpp b/llvm/lib/Frontend/HLSL/CBuffer.cpp index 407b6ad6d5a7e..744bf9fcd32a7 100644 --- a/llvm/lib/Frontend/HLSL/CBuffer.cpp +++ b/llvm/lib/Frontend/HLSL/CBuffer.cpp @@ -15,25 +15,28 @@ using namespace llvm; using namespace llvm::hlsl; -static size_t getMemberOffset(GlobalVariable *Handle, size_t Index) { +static SmallVector +getMemberOffsets(const DataLayout &DL, GlobalVariable *Handle, + llvm::function_ref IsPadding) { + SmallVector Offsets; + auto *HandleTy = cast(Handle->getValueType()); assert((HandleTy->getName().ends_with(".CBuffer") || HandleTy->getName() == "spirv.VulkanBuffer") && "Not a cbuffer type"); assert(HandleTy->getNumTypeParameters() == 1 && "Expected layout type"); + auto *LayoutTy = cast(HandleTy->getTypeParameter(0)); - auto *LayoutTy = cast(HandleTy->getTypeParameter(0)); - assert(LayoutTy->getName().ends_with(".Layout") && "Not a layout type"); - - // Skip the "size" parameter. - size_t ParamIndex = Index + 1; - assert(LayoutTy->getNumIntParameters() > ParamIndex && - "Not enough parameters"); + const StructLayout *SL = DL.getStructLayout(LayoutTy); + for (int I = 0, E = LayoutTy->getNumElements(); I < E; ++I) + if (!IsPadding(LayoutTy->getElementType(I))) + Offsets.push_back(SL->getElementOffset(I)); - return LayoutTy->getIntParameter(ParamIndex); + return Offsets; } -std::optional CBufferMetadata::get(Module &M) { +std::optional +CBufferMetadata::get(Module &M, llvm::function_ref IsPadding) { NamedMDNode *CBufMD = M.getNamedMetadata("hlsl.cbs"); if (!CBufMD) return std::nullopt; @@ -43,17 +46,25 @@ std::optional CBufferMetadata::get(Module &M) { for (const MDNode *MD : CBufMD->operands()) { assert(MD->getNumOperands() && "Invalid cbuffer metadata"); - auto *Handle = cast( - cast(MD->getOperand(0))->getValue()); + // For an unused cbuffer, the handle may have been optimizzd out + Metadata *OpMD = MD->getOperand(0); + if (!OpMD) + continue; + + auto *Handle = + cast(cast(OpMD)->getValue()); CBufferMapping &Mapping = Result->Mappings.emplace_back(Handle); + SmallVector MemberOffsets = + getMemberOffsets(M.getDataLayout(), Handle, IsPadding); + for (int I = 1, E = MD->getNumOperands(); I < E; ++I) { Metadata *OpMD = MD->getOperand(I); // Some members may be null if they've been optimized out. if (!OpMD) continue; auto *V = cast(cast(OpMD)->getValue()); - Mapping.Members.emplace_back(V, getMemberOffset(Handle, I - 1)); + Mapping.Members.emplace_back(V, MemberOffsets[I - 1]); } } @@ -64,10 +75,3 @@ void CBufferMetadata::eraseFromModule() { // Remove the cbs named metadata MD->eraseFromParent(); } - -APInt hlsl::translateCBufArrayOffset(const DataLayout &DL, APInt Offset, - ArrayType *Ty) { - int64_t TypeSize = DL.getTypeSizeInBits(Ty->getElementType()) / 8; - int64_t RoundUp = alignTo(TypeSize, Align(CBufferRowSizeInBytes)); - return Offset.udiv(TypeSize) * RoundUp; -} diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp index 0e9535d24a4cc..b060a6c4f3bf6 100644 --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -1008,6 +1008,10 @@ static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) { } if (Name == "spirv.IntegralConstant" || Name == "spirv.Literal") return TargetTypeInfo(Type::getVoidTy(C)); + if (Name == "spirv.Padding") + return TargetTypeInfo( + ArrayType::get(Type::getInt8Ty(C), Ty->getIntParameter(0)), + TargetExtType::CanBeGlobal); if (Name.starts_with("spirv.")) return TargetTypeInfo(PointerType::get(C, 0), TargetExtType::HasZeroInit, TargetExtType::CanBeGlobal, diff --git a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp index 44277971acd60..56245321a522a 100644 --- a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp +++ b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp @@ -8,11 +8,13 @@ #include "DXILCBufferAccess.h" #include "DirectX.h" +#include "llvm/Analysis/DXILResource.h" #include "llvm/Frontend/HLSL/CBuffer.h" #include "llvm/Frontend/HLSL/HLSLResource.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsDirectX.h" +#include "llvm/IR/ReplaceConstant.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/FormatVariadic.h" @@ -21,297 +23,41 @@ #define DEBUG_TYPE "dxil-cbuffer-access" using namespace llvm; -namespace { -/// Helper for building a `load.cbufferrow` intrinsic given a simple type. -struct CBufferRowIntrin { - Intrinsic::ID IID; - Type *RetTy; - unsigned int EltSize; - unsigned int NumElts; - - CBufferRowIntrin(const DataLayout &DL, Type *Ty) { - assert(Ty == Ty->getScalarType() && "Expected scalar type"); - - switch (DL.getTypeSizeInBits(Ty)) { - case 16: - IID = Intrinsic::dx_resource_load_cbufferrow_8; - RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty); - EltSize = 2; - NumElts = 8; - break; - case 32: - IID = Intrinsic::dx_resource_load_cbufferrow_4; - RetTy = StructType::get(Ty, Ty, Ty, Ty); - EltSize = 4; - NumElts = 4; - break; - case 64: - IID = Intrinsic::dx_resource_load_cbufferrow_2; - RetTy = StructType::get(Ty, Ty); - EltSize = 8; - NumElts = 2; - break; - default: - llvm_unreachable("Only 16, 32, and 64 bit types supported"); - } - } -}; - -// Helper for creating CBuffer handles and loading data from them -struct CBufferResource { - GlobalVariable *GVHandle; - GlobalVariable *Member; - size_t MemberOffset; - - LoadInst *Handle; - - CBufferResource(GlobalVariable *GVHandle, GlobalVariable *Member, - size_t MemberOffset) - : GVHandle(GVHandle), Member(Member), MemberOffset(MemberOffset) {} - - const DataLayout &getDataLayout() { return GVHandle->getDataLayout(); } - Type *getValueType() { return Member->getValueType(); } - iterator_range users() { - return Member->users(); - } - - /// Get the byte offset of a Pointer-typed Value * `Val` relative to Member. - /// `Val` can either be Member itself, or a GEP of a constant offset from - /// Member - size_t getOffsetForCBufferGEP(Value *Val) { - assert(isa(Val->getType()) && - "Expected a pointer-typed value"); - - if (Val == Member) - return 0; - - if (auto *GEP = dyn_cast(Val)) { - // Since we should always have a constant offset, we should only ever have - // a single GEP of indirection from the Global. - assert(GEP->getPointerOperand() == Member && - "Indirect access to resource handle"); - - const DataLayout &DL = getDataLayout(); - APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); - bool Success = GEP->accumulateConstantOffset(DL, ConstantOffset); - (void)Success; - assert(Success && "Offsets into cbuffer globals must be constant"); - - if (auto *ATy = dyn_cast(Member->getValueType())) - ConstantOffset = - hlsl::translateCBufArrayOffset(DL, ConstantOffset, ATy); - - return ConstantOffset.getZExtValue(); - } - - llvm_unreachable("Expected Val to be a GlobalVariable or GEP"); - } - - /// Create a handle for this cbuffer resource using the IRBuilder `Builder` - /// and sets the handle as the current one to use for subsequent calls to - /// `loadValue` - void createAndSetCurrentHandle(IRBuilder<> &Builder) { - Handle = Builder.CreateLoad(GVHandle->getValueType(), GVHandle, - GVHandle->getName()); +static void replaceUsersOfGlobal(GlobalVariable *Global, + GlobalVariable *HandleGV, size_t Offset) { + for (Use &U : make_early_inc_range(Global->uses())) { + auto UseInst = dyn_cast(U.getUser()); + // TODO: Constants? Metadata? + assert(UseInst && "Non-instruction use of cbuffer"); + + IRBuilder<> Builder(UseInst); + LoadInst *Handle = Builder.CreateLoad(HandleGV->getValueType(), HandleGV, + HandleGV->getName()); + Value *Ptr = Builder.CreateIntrinsic( + Global->getType(), Intrinsic::dx_resource_getpointer, + ArrayRef{Handle, + ConstantInt::get(Builder.getInt32Ty(), Offset)}); + U.set(Ptr); } - /// Load a value of type `Ty` at offset `Offset` using the handle from the - /// last call to `createAndSetCurrentHandle` - Value *loadValue(IRBuilder<> &Builder, Type *Ty, size_t Offset, - const Twine &Name = "") { - assert(Handle && - "Expected a handle for this cbuffer global resource to be created " - "before loading a value from it"); - const DataLayout &DL = getDataLayout(); - - size_t TargetOffset = MemberOffset + Offset; - CBufferRowIntrin Intrin(DL, Ty->getScalarType()); - // The cbuffer consists of some number of 16-byte rows. - unsigned int CurrentRow = TargetOffset / hlsl::CBufferRowSizeInBytes; - unsigned int CurrentIndex = - (TargetOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize; - - auto *CBufLoad = Builder.CreateIntrinsic( - Intrin.RetTy, Intrin.IID, - {Handle, ConstantInt::get(Builder.getInt32Ty(), CurrentRow)}, nullptr, - Name + ".load"); - auto *Elt = Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, - Name + ".extract"); - - Value *Result = nullptr; - unsigned int Remaining = - ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1; - - if (Remaining == 0) { - // We only have a single element, so we're done. - Result = Elt; - - // However, if we loaded a <1 x T>, then we need to adjust the type here. - if (auto *VT = dyn_cast(Ty)) { - assert(VT->getNumElements() == 1 && - "Can't have multiple elements here"); - Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result, - Builder.getInt32(0), Name); - } - return Result; - } - - // Walk each element and extract it, wrapping to new rows as needed. - SmallVector Extracts{Elt}; - while (Remaining--) { - CurrentIndex %= Intrin.NumElts; - - if (CurrentIndex == 0) - CBufLoad = Builder.CreateIntrinsic( - Intrin.RetTy, Intrin.IID, - {Handle, ConstantInt::get(Builder.getInt32Ty(), ++CurrentRow)}, - nullptr, Name + ".load"); - - Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, - Name + ".extract")); - } - - // Finally, we build up the original loaded value. - Result = PoisonValue::get(Ty); - for (int I = 0, E = Extracts.size(); I < E; ++I) - Result = - Builder.CreateInsertElement(Result, Extracts[I], Builder.getInt32(I), - Name + formatv(".upto{}", I)); - return Result; - } -}; - -} // namespace - -/// Replace load via cbuffer global with a load from the cbuffer handle itself. -static void replaceLoad(LoadInst *LI, CBufferResource &CBR, - SmallVectorImpl &DeadInsts) { - size_t Offset = CBR.getOffsetForCBufferGEP(LI->getPointerOperand()); - IRBuilder<> Builder(LI); - CBR.createAndSetCurrentHandle(Builder); - Value *Result = CBR.loadValue(Builder, LI->getType(), Offset, LI->getName()); - LI->replaceAllUsesWith(Result); - DeadInsts.push_back(LI); -} - -/// This function recursively copies N array elements from the cbuffer resource -/// CBR to the MemCpy Destination. Recursion is used to unravel multidimensional -/// arrays into a sequence of scalar/vector extracts and stores. -static void copyArrayElemsForMemCpy(IRBuilder<> &Builder, MemCpyInst *MCI, - CBufferResource &CBR, ArrayType *ArrTy, - size_t ArrOffset, size_t N, - const Twine &Name = "") { - const DataLayout &DL = MCI->getDataLayout(); - Type *ElemTy = ArrTy->getElementType(); - size_t ElemTySize = DL.getTypeAllocSize(ElemTy); - for (unsigned I = 0; I < N; ++I) { - size_t Offset = ArrOffset + I * ElemTySize; - - // Recursively copy nested arrays - if (ArrayType *ElemArrTy = dyn_cast(ElemTy)) { - copyArrayElemsForMemCpy(Builder, MCI, CBR, ElemArrTy, Offset, - ElemArrTy->getNumElements(), Name); - continue; - } - - // Load CBuffer value and store it in Dest - APInt CBufArrayOffset( - DL.getIndexTypeSizeInBits(MCI->getSource()->getType()), Offset); - CBufArrayOffset = - hlsl::translateCBufArrayOffset(DL, CBufArrayOffset, ArrTy); - Value *CBufferVal = - CBR.loadValue(Builder, ElemTy, CBufArrayOffset.getZExtValue(), Name); - Value *GEP = - Builder.CreateInBoundsGEP(Builder.getInt8Ty(), MCI->getDest(), - {Builder.getInt32(Offset)}, Name + ".dest"); - Builder.CreateStore(CBufferVal, GEP, MCI->isVolatile()); - } -} - -/// Replace memcpy from a cbuffer global with a memcpy from the cbuffer handle -/// itself. Assumes the cbuffer global is an array, and the length of bytes to -/// copy is divisible by array element allocation size. -/// The memcpy source must also be a direct cbuffer global reference, not a GEP. -static void replaceMemCpy(MemCpyInst *MCI, CBufferResource &CBR) { - - ArrayType *ArrTy = dyn_cast(CBR.getValueType()); - assert(ArrTy && "MemCpy lowering is only supported for array types"); - - // This assumption vastly simplifies the implementation - if (MCI->getSource() != CBR.Member) - reportFatalUsageError( - "Expected MemCpy source to be a cbuffer global variable"); - - ConstantInt *Length = dyn_cast(MCI->getLength()); - uint64_t ByteLength = Length->getZExtValue(); - - // If length to copy is zero, no memcpy is needed - if (ByteLength == 0) { - MCI->eraseFromParent(); - return; - } - - const DataLayout &DL = CBR.getDataLayout(); - - Type *ElemTy = ArrTy->getElementType(); - size_t ElemSize = DL.getTypeAllocSize(ElemTy); - assert(ByteLength % ElemSize == 0 && - "Length of bytes to MemCpy must be divisible by allocation size of " - "source/destination array elements"); - size_t ElemsToCpy = ByteLength / ElemSize; - - IRBuilder<> Builder(MCI); - CBR.createAndSetCurrentHandle(Builder); - - copyArrayElemsForMemCpy(Builder, MCI, CBR, ArrTy, 0, ElemsToCpy, - "memcpy." + MCI->getDest()->getName() + "." + - MCI->getSource()->getName()); - - MCI->eraseFromParent(); -} - -static void replaceAccessesWithHandle(CBufferResource &CBR) { - SmallVector DeadInsts; - - SmallVector ToProcess{CBR.users()}; - while (!ToProcess.empty()) { - User *Cur = ToProcess.pop_back_val(); - - // If we have a load instruction, replace the access. - if (auto *LI = dyn_cast(Cur)) { - replaceLoad(LI, CBR, DeadInsts); - continue; - } - - // If we have a memcpy instruction, replace it with multiple accesses and - // subsequent stores to the destination - if (auto *MCI = dyn_cast(Cur)) { - replaceMemCpy(MCI, CBR); - continue; - } - - // Otherwise, walk users looking for a load... - if (isa(Cur) || isa(Cur)) { - ToProcess.append(Cur->user_begin(), Cur->user_end()); - continue; - } - - llvm_unreachable("Unexpected user of Global"); - } - RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); + Global->removeFromParent(); } static bool replaceCBufferAccesses(Module &M) { - std::optional CBufMD = hlsl::CBufferMetadata::get(M); + std::optional CBufMD = hlsl::CBufferMetadata::get( + M, [](Type *Ty) { return isa(Ty); }); if (!CBufMD) return false; + SmallVector CBufferGlobals; + for (const hlsl::CBufferMapping &Mapping : *CBufMD) + for (const hlsl::CBufferMember &Member : Mapping.Members) + CBufferGlobals.push_back(Member.GV); + convertUsersOfConstantsToInstructions(CBufferGlobals); + for (const hlsl::CBufferMapping &Mapping : *CBufMD) - for (const hlsl::CBufferMember &Member : Mapping.Members) { - CBufferResource CBR(Mapping.Handle, Member.GV, Member.Offset); - replaceAccessesWithHandle(CBR); - Member.GV->removeFromParent(); - } + for (const hlsl::CBufferMember &Member : Mapping.Members) + replaceUsersOfGlobal(Member.GV, Mapping.Handle, Member.Offset); CBufMD->eraseFromModule(); return true; diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp index 6579d3405cf39..e467e3c8a218c 100644 --- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp +++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp @@ -10,6 +10,7 @@ #include "DirectX.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/DXILResource.h" +#include "llvm/Frontend/HLSL/HLSLResource.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" @@ -20,6 +21,7 @@ #include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/User.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Transforms/Utils/ValueMapper.h" #define DEBUG_TYPE "dxil-resource-access" @@ -44,16 +46,28 @@ static Value *calculateGEPOffset(GetElementPtrInst *GEP, Value *PrevOffset, APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); if (GEP->accumulateConstantOffset(DL, ConstantOffset)) { APInt Scaled = ConstantOffset.udiv(ScalarSize); - return ConstantInt::get(Type::getInt32Ty(GEP->getContext()), Scaled); + return ConstantInt::get(DL.getIndexType(GEP->getType()), Scaled); } - auto IndexIt = GEP->idx_begin(); - assert(cast(IndexIt)->getZExtValue() == 0 && - "GEP is not indexing through pointer"); - ++IndexIt; - Value *Offset = *IndexIt; - assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP"); - return Offset; + unsigned NumIndices = GEP->getNumIndices(); + + // If we have a single index we're indexing into a top level array. This + // generally only happens with cbuffers. + if (NumIndices == 1) + return *GEP->idx_begin(); + + // If we have two indices, this should be a simple access through a pointer. + if (NumIndices == 2) { + auto IndexIt = GEP->idx_begin(); + assert(cast(IndexIt)->getZExtValue() == 0 && + "GEP is not indexing through pointer"); + ++IndexIt; + Value *Offset = *IndexIt; + assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP"); + return Offset; + } + + llvm_unreachable("Unhandled GEP structure for resource access"); } static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI, @@ -171,6 +185,123 @@ static void createRawLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset) { LI->replaceAllUsesWith(V); } +namespace { +/// Helper for building a `load.cbufferrow` intrinsic given a simple type. +struct CBufferRowIntrin { + Intrinsic::ID IID; + Type *RetTy; + unsigned int EltSize; + unsigned int NumElts; + + CBufferRowIntrin(const DataLayout &DL, Type *Ty) { + assert(Ty == Ty->getScalarType() && "Expected scalar type"); + + switch (DL.getTypeSizeInBits(Ty)) { + case 16: + IID = Intrinsic::dx_resource_load_cbufferrow_8; + RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty); + EltSize = 2; + NumElts = 8; + break; + case 32: + IID = Intrinsic::dx_resource_load_cbufferrow_4; + RetTy = StructType::get(Ty, Ty, Ty, Ty); + EltSize = 4; + NumElts = 4; + break; + case 64: + IID = Intrinsic::dx_resource_load_cbufferrow_2; + RetTy = StructType::get(Ty, Ty); + EltSize = 8; + NumElts = 2; + break; + default: + llvm_unreachable("Only 16, 32, and 64 bit types supported"); + } + } +}; +} // namespace + +static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset, + dxil::ResourceTypeInfo &RTI) { + const DataLayout &DL = LI->getDataLayout(); + + Type *Ty = LI->getType(); + assert(!isa(Ty) && "Structs not handled yet"); + CBufferRowIntrin Intrin(DL, Ty->getScalarType()); + + StringRef Name = LI->getName(); + Value *Handle = II->getOperand(0); + + IRBuilder<> Builder(LI); + + ConstantInt *GlobalOffset = dyn_cast(II->getOperand(1)); + assert(GlobalOffset && "CBuffer getpointer index must be constant"); + + unsigned int FixedOffset = GlobalOffset->getZExtValue(); + // If we have a further constant offset we can just fold it in to the fixed + // offset. + if (auto *ConstOffset = dyn_cast_if_present(Offset)) { + FixedOffset += ConstOffset->getZExtValue(); + Offset = nullptr; + } + + Value *CurrentRow = ConstantInt::get( + Builder.getInt32Ty(), FixedOffset / hlsl::CBufferRowSizeInBytes); + unsigned int CurrentIndex = + (FixedOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize; + + assert(!(CurrentIndex && Offset) && + "Dynamic indexing into elements of cbuffer rows is not supported"); + if (Offset) + CurrentRow = FixedOffset ? Builder.CreateAdd(CurrentRow, Offset) : Offset; + + auto *CBufLoad = Builder.CreateIntrinsic( + Intrin.RetTy, Intrin.IID, {Handle, CurrentRow}, nullptr, Name + ".load"); + auto *Elt = + Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, Name + ".extract"); + + unsigned int Remaining = + ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1; + if (Remaining == 0) { + // We only have a single element, so we're done. + Value *Result = Elt; + + // However, if we loaded a <1 x T>, then we need to adjust the type. + if (auto *VT = dyn_cast(Ty)) { + assert(VT->getNumElements() == 1 && "Can't have multiple elements here"); + Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result, + Builder.getInt32(0), Name); + } + LI->replaceAllUsesWith(Result); + return; + } + + // Walk each element and extract it, wrapping to new rows as needed. + SmallVector Extracts{Elt}; + while (Remaining--) { + CurrentIndex %= Intrin.NumElts; + + if (CurrentIndex == 0) { + CurrentRow = Builder.CreateAdd(CurrentRow, + ConstantInt::get(Builder.getInt32Ty(), 1)); + CBufLoad = Builder.CreateIntrinsic(Intrin.RetTy, Intrin.IID, + {Handle, CurrentRow}, nullptr, + Name + ".load"); + } + + Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, + Name + ".extract")); + } + + // Finally, we build up the original loaded value. + Value *Result = PoisonValue::get(Ty); + for (int I = 0, E = Extracts.size(); I < E; ++I) + Result = Builder.CreateInsertElement( + Result, Extracts[I], Builder.getInt32(I), Name + formatv(".upto{}", I)); + LI->replaceAllUsesWith(Result); +} + static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, dxil::ResourceTypeInfo &RTI) { switch (RTI.getResourceKind()) { @@ -179,6 +310,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, case dxil::ResourceKind::RawBuffer: case dxil::ResourceKind::StructuredBuffer: return createRawLoad(II, LI, Offset); + case dxil::ResourceKind::CBuffer: + return createCBufferLoad(II, LI, Offset, RTI); case dxil::ResourceKind::Texture1D: case dxil::ResourceKind::Texture2D: case dxil::ResourceKind::Texture2DMS: @@ -190,9 +323,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, case dxil::ResourceKind::TextureCubeArray: case dxil::ResourceKind::FeedbackTexture2D: case dxil::ResourceKind::FeedbackTexture2DArray: - case dxil::ResourceKind::CBuffer: case dxil::ResourceKind::TBuffer: - // TODO: handle these + reportFatalUsageError("Load not yet implemented for resource type"); return; case dxil::ResourceKind::Sampler: case dxil::ResourceKind::RTAccelerationStructure: diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index bcf84403b2c0d..be3d762379fd9 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -109,9 +109,9 @@ class DirectXPassConfig : public TargetPassConfig { void addCodeGenPrepare() override { addPass(createDXILFinalizeLinkageLegacyPass()); addPass(createGlobalDCEPass()); + addPass(createDXILCBufferAccessLegacyPass()); addPass(createDXILResourceAccessLegacyPass()); addPass(createDXILIntrinsicExpansionLegacyPass()); - addPass(createDXILCBufferAccessLegacyPass()); addPass(createDXILDataScalarizationLegacyPass()); ScalarizerPassOptions DxilScalarOptions; DxilScalarOptions.ScalarizeLoadStore = true; diff --git a/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp b/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp index f7fb886e7391d..329774df554f4 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicsSPIRV.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ReplaceConstant.h" #define DEBUG_TYPE "spirv-cbuffer-access" using namespace llvm; @@ -53,10 +54,21 @@ static Instruction *findHandleDef(GlobalVariable *HandleVar) { } static bool replaceCBufferAccesses(Module &M) { - std::optional CBufMD = hlsl::CBufferMetadata::get(M); + std::optional CBufMD = + hlsl::CBufferMetadata::get(M, [](Type *Ty) { + if (auto *TET = dyn_cast(Ty)) + return TET->getName() == "spirv.Padding"; + return false; + }); if (!CBufMD) return false; + SmallVector CBufferGlobals; + for (const hlsl::CBufferMapping &Mapping : *CBufMD) + for (const hlsl::CBufferMember &Member : Mapping.Members) + CBufferGlobals.push_back(Member.GV); + convertUsersOfConstantsToInstructions(CBufferGlobals); + for (const hlsl::CBufferMapping &Mapping : *CBufMD) { Instruction *HandleDef = findHandleDef(Mapping.Handle); if (!HandleDef) { @@ -80,12 +92,7 @@ static bool replaceCBufferAccesses(Module &M) { Value *GetPointerCall = Builder.CreateIntrinsic( PtrType, Intrinsic::spv_resource_getpointer, {HandleDef, IndexVal}); - // We cannot use replaceAllUsesWith here because some uses may be - // ConstantExprs, which cannot be replaced with non-constants. - SmallVector Users(MemberGV->users()); - for (User *U : Users) { - U->replaceUsesOfWith(MemberGV, GetPointerCall); - } + MemberGV->replaceAllUsesWith(GetPointerCall); } } diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index a151fd2fbdb7a..b32856e836230 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -835,9 +835,6 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper( if (Ty->isArrayTy()) Ty = Ty->getArrayElementType(); else { - TargetExtType *BufferTy = cast(Ty); - assert(BufferTy->getTargetExtName() == "spirv.Layout"); - Ty = BufferTy->getTypeParameter(0); assert(Ty && Ty->isStructTy()); uint32_t Index = cast(II->getOperand(1))->getZExtValue(); Ty = cast(Ty)->getElementType(Index); diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll deleted file mode 100644 index 52ad0f3df1aba..0000000000000 --- a/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll +++ /dev/null @@ -1,32 +0,0 @@ -; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s - -; cbuffer CB : register(b0) { -; float a1[3]; -; } -%__cblayout_CB = type <{ [3 x float] }> - -@CB.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) poison -; CHECK: @CB.cb = -; CHECK-NOT: external {{.*}} addrspace(2) global -@a1 = external addrspace(2) global [3 x float], align 4 - -; CHECK: define void @f -define void @f(ptr %dst) { -entry: - %CB.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBs_36_0tt(i32 0, i32 0, i32 1, i32 0, ptr null) - store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) %CB.cb_h, ptr @CB.cb, align 4 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) - ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 - ; CHECK: store float [[X]], ptr %dst - %a1 = load float, ptr addrspace(2) getelementptr inbounds ([3 x float], ptr addrspace(2) @a1, i32 0, i32 1), align 4 - store float %a1, ptr %dst, align 32 - - ret void -} - -; CHECK-NOT: !hlsl.cbs = -!hlsl.cbs = !{!0} - -!0 = !{ptr @CB.cb, ptr addrspace(2) @a1} diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll index db4e14c1336a6..47c9a094686fb 100644 --- a/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll +++ b/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll @@ -1,124 +1,54 @@ ; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s +; TODO: Remove datalayout. +; This hack forces dxil-compatible alignment of 3-element 32- and 64-bit vectors +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64-v96:32:32-v192:64:64" + ; cbuffer CB : register(b0) { -; float a1[3]; -; double3 a2[2]; -; float16_t a3[2][2]; -; uint64_t a4[3]; -; int4 a5[2][3][4]; -; uint16_t a6[1]; -; int64_t a7[2]; -; bool a8[4]; +; float a1[3]; // offset 0, size 4 (+12) * 3 +; double3 a2[2]; // offset 48, size 24 (+8) * 2 +; float16_t a3[2][2]; // offset 112, size 2 (+14) * 4 ; } -%__cblayout_CB = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [2 x [3 x [4 x <4 x i32>]]], [1 x i16], [2 x i64], [4 x i32] }> +%__cblayout_CB = type <{ + <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12), + <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, target("dx.Padding", 8), + <{ [3 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14), +}> -@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) poison +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison ; CHECK: @CB.cb = ; CHECK-NOT: external {{.*}} addrspace(2) global -@a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4 -@a2 = external local_unnamed_addr addrspace(2) global [2 x <3 x double>], align 32 -@a3 = external local_unnamed_addr addrspace(2) global [2 x [2 x half]], align 2 -@a4 = external local_unnamed_addr addrspace(2) global [3 x i64], align 8 -@a5 = external local_unnamed_addr addrspace(2) global [2 x [3 x [4 x <4 x i32>]]], align 16 -@a6 = external local_unnamed_addr addrspace(2) global [1 x i16], align 2 -@a7 = external local_unnamed_addr addrspace(2) global [2 x i64], align 8 -@a8 = external local_unnamed_addr addrspace(2) global [4 x i32], align 4 +@a1 = external local_unnamed_addr addrspace(2) global <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, align 4 +@a2 = external local_unnamed_addr addrspace(2) global <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, align 32 +@a3 = external local_unnamed_addr addrspace(2) global <{ [3 x <{ half, target("dx.Padding", 14) }>], half }>, align 2 ; CHECK: define void @f define void @f(ptr %dst) { entry: - %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) - store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) %CB.cb_h.i.i, ptr @CB.cb, align 4 + %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) - ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 - ; CHECK: store float [[X]], ptr %dst - %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4 + ; CHECK: [[PTR:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 0) + ; CHECK: getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 16 + %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 16), align 4 store float %a1, ptr %dst, align 32 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5) - ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0 - ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1 - ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6) - ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0 - ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0 - ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1 - ; CHECK: [[VEC2:%.*]] = insertelement <3 x double> [[VEC1]], double [[Z]], i32 2 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8 - ; CHECK: store <3 x double> [[VEC2]], ptr [[PTR]] + ; CHECK: [[PTR:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 48) + ; CHECK: getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 32 %a2 = load <3 x double>, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a2, i32 32), align 8 %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 8 store <3 x double> %a2, ptr %a2.i, align 32 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 8) - ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 32 - ; CHECK: store half [[X]], ptr [[PTR]] - %a3 = load half, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a3, i32 6), align 2 + ; CHECK: [[PTR:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 112) + ; CHECK: getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 16 + %a3 = load half, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a3, i32 16), align 2 %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 32 store half %a3, ptr %a3.i, align 2 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 12) - ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40 - ; CHECK: store i64 [[X]], ptr [[PTR]] - %a4 = load i64, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a4, i32 8), align 8 - %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 40 - store i64 %a4, ptr %a4.i, align 8 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 26) - ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 - ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 - ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 - ; CHECK: [[A:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3 - ; CHECK: [[VEC0:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0 - ; CHECK: [[VEC1:%.*]] = insertelement <4 x i32> [[VEC0]], i32 [[Y]], i32 1 - ; CHECK: [[VEC2:%.*]] = insertelement <4 x i32> [[VEC1]], i32 [[Z]], i32 2 - ; CHECK: [[VEC3:%.*]] = insertelement <4 x i32> [[VEC2]], i32 [[A]], i32 3 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 48 - ; CHECK: store <4 x i32> [[VEC3]], ptr [[PTR]] - %a5 = load <4 x i32>, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a5, i32 272), align 4 - %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 48 - store <4 x i32> %a5, ptr %a5.i, align 4 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 38) - ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 64 - ; CHECK: store i16 [[X]], ptr [[PTR]] - %a6 = load i16, ptr addrspace(2) @a6, align 2 - %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 64 - store i16 %a6, ptr %a6.i, align 2 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 40) - ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72 - ; CHECK: store i64 [[X]], ptr [[PTR]] - %a7 = load i64, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a7, i32 8), align 8 - %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 72 - store i64 %a7, ptr %a7.i, align 8 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 42) - ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 80 - ; CHECK: store i32 [[X]], ptr [[PTR]] - %a8 = load i32, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a8, i32 4), align 4, !range !1, !noundef !2 - %a8.i = getelementptr inbounds nuw i8, ptr %dst, i32 80 - store i32 %a8, ptr %a8.i, align 4 - ret void } ; CHECK-NOT: !hlsl.cbs = !hlsl.cbs = !{!0} -!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7, ptr addrspace(2) @a8} -!1 = !{i32 0, i32 2} -!2 = !{} +!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3} diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll deleted file mode 100644 index d7272b449166d..0000000000000 --- a/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s - -%__cblayout_CB = type <{ float }> - -@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 4, 0)) poison -; CHECK: @CB.cb = -; CHECK-NOT: external {{.*}} addrspace(2) global -@x = external local_unnamed_addr addrspace(2) global float, align 4 - -; CHECK: define void @f -define void @f(ptr %dst) { -entry: - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) - ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 - ; CHECK: store float [[X]], ptr %dst - %x = load float, ptr addrspace(2) @x, align 4 - store float %x, ptr %dst, align 4 - ret void -} - -; CHECK-NOT: !hlsl.cbs = -!hlsl.cbs = !{!0} - -!0 = !{ptr @CB.cb, ptr addrspace(2) @x} diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll index abe087dbe6100..8ea6c1c459b5b 100644 --- a/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll +++ b/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll @@ -3,28 +3,24 @@ ; cbuffer CB : register(b0) { ; float a1[3]; ; } -%__cblayout_CB = type <{ [3 x float] }> +%__cblayout_CB = type <{ [2 x <{ float, [12 x i8] }>], float }> -@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) poison +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison ; CHECK: @CB.cb = ; CHECK-NOT: external {{.*}} addrspace(2) global -@a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4 +@a1 = external addrspace(2) global <{ [2 x <{ float, [12 x i8] }>], float }>, align 4 ; CHECK: define void @f define void @f(ptr %dst) { entry: - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) - ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 - ; CHECK: store float [[X]], ptr %dst - %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4 + ; CHECK: [[PTR:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 0) + ; CHECK: getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 16 + %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 16), align 4 store float %a1, ptr %dst, align 32 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) - ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 - ; CHECK: store float [[X]], ptr %dst - %a2 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4 + ; CHECK: [[PTR:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 0) + ; CHECK: getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 16 + %a2 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 16), align 4 store float %a2, ptr %dst, align 32 ret void diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll deleted file mode 100644 index f1486f974fb36..0000000000000 --- a/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll +++ /dev/null @@ -1,216 +0,0 @@ -; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s - -; cbuffer CB : register(b0) { -; float a1[3]; -; double3 a2[2]; -; float16_t a3[2][2]; -; uint64_t a4[3]; -; int2 a5[3][2]; -; uint16_t a6[1]; -; int64_t a7[2]; -; bool a8[4]; -; } -%__cblayout_CB = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [3 x [2 x <2 x i32>]], [1 x i16], [2 x i64], [4 x i32] }> - -@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) poison -@a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4 -@a2 = external local_unnamed_addr addrspace(2) global [2 x <3 x double>], align 32 -@a3 = external local_unnamed_addr addrspace(2) global [2 x [2 x half]], align 2 -@a4 = external local_unnamed_addr addrspace(2) global [3 x i64], align 8 -@a5 = external local_unnamed_addr addrspace(2) global [3 x [2 x <2 x i32>]], align 16 -@a6 = external local_unnamed_addr addrspace(2) global [1 x i16], align 2 -@a7 = external local_unnamed_addr addrspace(2) global [2 x i64], align 8 -@a8 = external local_unnamed_addr addrspace(2) global [4 x i32], align 4 - -; CHECK: define void @f( -define void @f(ptr %dst) { -entry: - %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) - store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) %CB.cb_h.i.i, ptr @CB.cb, align 4 - - %a1.copy = alloca [3 x float], align 4 - %a2.copy = alloca [2 x <3 x double>], align 32 - %a3.copy = alloca [2 x [2 x half]], align 2 - %a4.copy = alloca [3 x i64], align 8 - %a5.copy = alloca [3 x [2 x <2 x i32>]], align 16 - %a6.copy = alloca [1 x i16], align 2 - %a7.copy = alloca [2 x i64], align 8 - %a8.copy = alloca [4 x i32], align 4 - - ; Try copying no elements -; CHECK-NOT: memcpy - call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 0, i1 false) - - ; Try copying only the first element -; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 -; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) -; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY:%.*]], i32 0 -; CHECK: store float [[X]], ptr [[DEST]], align 4 - call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 4, i1 false) - -; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 -; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) -; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY:%.*]], i32 0 -; CHECK: store float [[X]], ptr [[DEST]], align 4 -; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) -; CHECK: [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY]], i32 4 -; CHECK: store float [[Y]], ptr [[DEST]], align 4 -; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2) -; CHECK: [[Z:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY]], i32 8 -; CHECK: store float [[Z]], ptr [[DEST]], align 4 - call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 12, i1 false) - -; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 -; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 3) -; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0 -; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1 -; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 4) -; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0 -; CHECK: [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0 -; CHECK: [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1 -; CHECK: [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY:%.*]], i32 0 -; CHECK: store <3 x double> [[UPTO2]], ptr [[DEST]], align 8 -; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5) -; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0 -; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1 -; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6) -; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0 -; CHECK: [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0 -; CHECK: [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1 -; CHECK: [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY]], i32 24 -; CHECK: store <3 x double> [[UPTO2]], ptr [[DEST]], align 8 - call void @llvm.memcpy.p0.p2.i32(ptr align 32 %a2.copy, ptr addrspace(2) align 32 @a2, i32 48, i1 false) - -; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 -; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 7) -; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY:%.*]], i32 0 -; CHECK: store half [[X]], ptr [[DEST]], align 2 -; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 8) -; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 2 -; CHECK: store half [[Y]], ptr [[DEST]], align 2 -; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 9) -; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 4 -; CHECK: store half [[X]], ptr [[DEST]], align 2 -; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 10) -; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 6 -; CHECK: store half [[Y]], ptr [[DEST]], align 2 - call void @llvm.memcpy.p0.p2.i32(ptr align 2 %a3.copy, ptr addrspace(2) align 2 @a3, i32 8, i1 false) - -; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 -; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 11) -; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY:%.*]], i32 0 -; CHECK: store i64 [[X]], ptr [[DEST]], align 8 -; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 12) -; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY]], i32 8 -; CHECK: store i64 [[Y]], ptr [[DEST]], align 8 -; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 13) -; CHECK: [[Z:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY]], i32 16 -; CHECK: store i64 [[Z]], ptr [[DEST]], align 8 - call void @llvm.memcpy.p0.p2.i32(ptr align 8 %a4.copy, ptr addrspace(2) align 8 @a4, i32 24, i1 false) - -; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 14) -; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 -; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0 -; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY:%.*]], i32 0 -; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 15) -; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 -; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0 -; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 8 -; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 16) -; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 -; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0 -; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 16 -; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 17) -; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 -; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0 -; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 24 -; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 18) -; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 -; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0 -; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 32 -; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 19) -; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 -; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0 -; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 40 -; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8 - call void @llvm.memcpy.p0.p2.i32(ptr align 16 %a5.copy, ptr addrspace(2) align 16 @a5, i32 48, i1 false) - -; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 -; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 17) -; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A6_COPY:%.*]], i32 0 -; CHECK: store i16 [[X]], ptr [[DEST]], align 2 - call void @llvm.memcpy.p0.p2.i32(ptr align 2 %a6.copy, ptr addrspace(2) align 2 @a6, i32 2, i1 false) - -; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 -; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 18) -; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A7_COPY:%.*]], i32 0 -; CHECK: store i64 [[X]], ptr [[DEST]], align 8 -; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 19) -; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A7_COPY]], i32 8 -; CHECK: store i64 [[Y]], ptr [[DEST]], align 8 - call void @llvm.memcpy.p0.p2.i32(ptr align 8 %a7.copy, ptr addrspace(2) align 8 @a7, i32 16, i1 false) - -; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 20) -; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY:%.*]], i32 0 -; CHECK: store i32 [[X]], ptr [[DEST]], align 4 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 21) -; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 4 -; CHECK: store i32 [[Y]], ptr [[DEST]], align 4 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 22) -; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 8 -; CHECK: store i32 [[Z]], ptr [[DEST]], align 4 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 23) -; CHECK: [[W:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 12 -; CHECK: store i32 [[W]], ptr [[DEST]], align 4 - call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a8.copy, ptr addrspace(2) align 4 @a8, i32 16, i1 false) - - ret void -} - -declare void @llvm.memcpy.p0.p2.i32(ptr noalias writeonly captures(none), ptr addrspace(2) noalias readonly captures(none), i32, i1 immarg) - -; CHECK-NOT: !hlsl.cbs = -!hlsl.cbs = !{!0} - -!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7, ptr addrspace(2) @a8} -!1 = !{i32 0, i32 2} -!2 = !{} diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll index 7857c25d69636..7cc0faac617c7 100644 --- a/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll +++ b/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll @@ -4,97 +4,40 @@ ; float a1; // offset 0, size 4 ; int a2; // offset 4, size 4 ; bool a3; // offset 8, size 4 -; float16_t a4; // offset 12, size 2 -; uint16_t a5; // offset 14, size 2 -; double a6; // offset 16, size 8 -; int64_t a7; // offset 24, size 8 ; } -%__cblayout_CB = type <{ float, i32, i32, half, i16, double, i64 }> +%__cblayout_CB = type <{ float, i32, i32 }> -@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) poison +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison ; CHECK: @CB.cb = ; CHECK-NOT: external {{.*}} addrspace(2) global @a1 = external local_unnamed_addr addrspace(2) global float, align 4 @a2 = external local_unnamed_addr addrspace(2) global i32, align 4 @a3 = external local_unnamed_addr addrspace(2) global i32, align 4 -@a4 = external local_unnamed_addr addrspace(2) global half, align 2 -@a5 = external local_unnamed_addr addrspace(2) global i16, align 2 -@a6 = external local_unnamed_addr addrspace(2) global double, align 8 -@a7 = external local_unnamed_addr addrspace(2) global i64, align 8 ; CHECK: define void @f define void @f(ptr %dst) { entry: - %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) - store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) %CB.cb_h.i.i, ptr @CB.cb, align 4 + %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) - ; CHECK: [[A1:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 - ; CHECK: store float [[A1]], ptr %dst + ; CHECK: call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 0) %a1 = load float, ptr addrspace(2) @a1, align 4 store float %a1, ptr %dst, align 8 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) - ; CHECK: [[A2:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4 - ; CHECK: store i32 [[A2]], ptr [[PTR]] + ; CHECK: call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 4) %a2 = load i32, ptr addrspace(2) @a2, align 4 %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 4 store i32 %a2, ptr %a2.i, align 4 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) - ; CHECK: [[A3:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8 - ; CHECK: store i32 [[A3]], ptr [[PTR]] + ; CHECK: call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 8) %a3 = load i32, ptr addrspace(2) @a3, align 4, !range !1, !noundef !2 %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 8 store i32 %a3, ptr %a3.i, align 8 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) - ; CHECK: [[A4:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 6 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 12 - ; CHECK: store half [[A4]], ptr [[PTR]] - %a4 = load half, ptr addrspace(2) @a4, align 2 - %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 12 - store half %a4, ptr %a4.i, align 4 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) - ; CHECK: [[A5:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 7 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 14 - ; CHECK: store i16 [[A5]], ptr [[PTR]] - %a5 = load i16, ptr addrspace(2) @a5, align 2 - %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 14 - store i16 %a5, ptr %a5.i, align 2 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) - ; CHECK: [[A6:%.*]] = extractvalue { double, double } [[LOAD]], 0 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16 - ; CHECK: store double [[A6]], ptr [[PTR]] - %a6 = load double, ptr addrspace(2) @a6, align 8 - %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 16 - store double %a6, ptr %a6.i, align 8 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) - ; CHECK: [[A7:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 24 - ; CHECK: store i64 [[A7]], ptr [[PTR]] - %a7 = load i64, ptr addrspace(2) @a7, align 8 - %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 24 - store i64 %a7, ptr %a7.i, align 8 - ret void } ; CHECK-NOT: !hlsl.cbs = !hlsl.cbs = !{!0} -!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7} -!1 = !{i32 0, i32 2} -!2 = !{} +!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3} diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/unused.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/unused.ll new file mode 100644 index 0000000000000..8c0d82e43b4b1 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/CBufferAccess/unused.ll @@ -0,0 +1,13 @@ +; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s +; Check that we correctly ignore cbuffers that were nulled out by optimizations. + +%__cblayout_CB = type <{ float }> +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison +@x = external local_unnamed_addr addrspace(2) global float, align 4 + +; CHECK-NOT: !hlsl.cbs = +!hlsl.cbs = !{!0, !1, !2} + +!0 = !{ptr @CB.cb, ptr addrspace(2) @x} +!1 = !{ptr @CB.cb, null} +!2 = !{null, null} diff --git a/llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll b/llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll deleted file mode 100644 index 85952c9ae4e83..0000000000000 --- a/llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll +++ /dev/null @@ -1,82 +0,0 @@ -; TODO: Remove this test once we've updated the frontend to use explicit -; padding. The cbuffer-metadata.ll test covers the newer logic. - -; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s -; RUN: opt -S --passes="dxil-pretty-printer" < %s 2>&1 | FileCheck %s --check-prefix=PRINT -; RUN: llc %s --filetype=asm -o - < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,PRINT - -target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" -target triple = "dxil-pc-shadermodel6.6-compute" - -%__cblayout_CB1 = type <{ float, i32, double, <2 x i32> }> -@CB1.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB1, 24, 0, 4, 8, 16)) poison -@CB1.str = private unnamed_addr constant [4 x i8] c"CB1\00", align 1 - -%__cblayout_CB2 = type <{ float, double, float, half, i16, i64, i32 }> -@CB2.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB2, 36, 0, 8, 16, 20, 22, 24, 32)) poison -@CB2.str = private unnamed_addr constant [4 x i8] c"CB2\00", align 1 - -%__cblayout_MyConstants = type <{ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }> -@MyConstants.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_MyConstants, 96, 0, 16, 28, 32, 56, 64, 80, 84, 90)) poison -@MyConstants.str = private unnamed_addr constant [12 x i8] c"MyConstants\00", align 1 - -; PRINT:; Resource Bindings: -; PRINT-NEXT:; -; PRINT-NEXT:; Name Type Format Dim ID HLSL Bind Count -; PRINT-NEXT:; ---- -; PRINT-NEXT:; CB1 cbuffer NA NA CB0 cb0 1 -; PRINT-NEXT:; CB2 cbuffer NA NA CB1 cb1 1 -; PRINT-NEXT:; MyConstants cbuffer NA NA CB2 cb5,space15 1 - -define void @test() #0 { - - ; cbuffer CB1 : register(b0) { - ; float a; - ; int b; - ; double c; - ; int2 d; - ; } - %CB1.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB1, 24, 0, 4, 8, 16)) - @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr @CB1.str) - ; cbuffer CB2 : register(b0) { - ; float a; - ; double b; - ; float c; - ; half d; - ; uint16_t e; - ; int64_t f; - ; int g; - ;} - - %CB2.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB2, 36, 0, 8, 16, 20, 22, 24, 32)) - @llvm.dx.resource.handlefrombinding(i32 0, i32 1, i32 1, i32 0, ptr @CB2.str) - ; cbuffer CB3 : register(b5) { - ; double B0; - ; float3 B1; - ; float B2; - ; double3 B3; - ; half B4; - ; double2 B5; - ; float B6; - ; half3 B7; - ; half3 B8; - ; } - %CB3.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_MyConstants, 96, 0, 16, 28, 32, 56, 64, 80, 84, 90)) - @llvm.dx.resource.handlefrombinding(i32 15, i32 5, i32 1, i32 0, ptr @MyConstants.str) - - ret void -} - -attributes #0 = { noinline nounwind "hlsl.shader"="compute" } - -; CHECK: @CB1 = external constant %CBuffer.CB1 -; CHECK: @CB2 = external constant %CBuffer.CB2 -; CHECK: @MyConstants = external constant %CBuffer.MyConstants - -; CHECK: !dx.resources = !{[[ResList:[!][0-9]+]]} - -; CHECK: [[ResList]] = !{null, null, [[CBList:[!][0-9]+]], null} -; CHECK: [[CBList]] = !{![[CB1:[0-9]+]], ![[CB2:[0-9]+]], ![[MYCONSTANTS:[0-9]+]]} -; CHECK: ![[CB1]] = !{i32 0, ptr @CB1, !"CB1", i32 0, i32 0, i32 1, i32 24, null} -; CHECK: ![[CB2]] = !{i32 1, ptr @CB2, !"CB2", i32 0, i32 1, i32 1, i32 36, null} -; CHECK: ![[MYCONSTANTS]] = !{i32 2, ptr @MyConstants, !"MyConstants", i32 15, i32 5, i32 1, i32 96, null} diff --git a/llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll b/llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll index 4f13f4789cd66..56798c8382d45 100644 --- a/llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll +++ b/llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll @@ -28,6 +28,11 @@ define void @test() { @llvm.dx.resource.handlefrombinding(i32 0, i32 10, i32 1, i32 0, ptr @SB.str) ; CHECK: %"StructuredBuffer" = type { %struct.S } + ; StructuredBuffer + %struct1 = call target("dx.RawBuffer", [3 x [2 x float]], 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 12, i32 1, i32 0, ptr null) + ; CHECK: %"StructuredBuffer" = type { [3 x [2 x float]] } + ; ByteAddressBuffer %byteaddr = call target("dx.RawBuffer", i8, 0, 0) @llvm.dx.resource.handlefrombinding(i32 0, i32 20, i32 1, i32 0, ptr null) @@ -40,12 +45,14 @@ define void @test() { ; CHECK-NEXT: @[[T1:.*]] = external constant %"Buffer" ; CHECK-NEXT: @[[T2:.*]] = external constant %"Buffer" ; CHECK-NEXT: @[[S0:.*]] = external constant %"StructuredBuffer" +; CHECK-NEXT: @[[S1:.*]] = external constant %"StructuredBuffer" ; CHECK-NEXT: @[[B0:.*]] = external constant %ByteAddressBuffer ; CHECK: !{i32 0, ptr @[[T0]], !"A" ; CHECK: !{i32 1, ptr @[[T1]], !"" ; CHECK: !{i32 2, ptr @[[T2]], !"" ; CHECK: !{i32 3, ptr @[[S0]], !"SB" -; CHECK: !{i32 4, ptr @[[B0]], !"" +; CHECK: !{i32 4, ptr @[[S1]], !"" +; CHECK: !{i32 5, ptr @[[B0]], !"" attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll new file mode 100644 index 0000000000000..22fba8c1d5f8c --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll @@ -0,0 +1,59 @@ +; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s +; +; Tests for indexed types in dynamically indexed arrays in cbuffers. +; +; struct S { +; float x[2]; +; uint q; +; }; +; cbuffer CB : register(b0) { +; uint32_t3 w[3]; // offset 0, size 12 (+4) * 3 +; S v[3]; // offset 48, size 24 (+8) * 3 +; } +%S = type <{ <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, i32 }> +%__cblayout_CB = type <{ + <{ + [2 x <{ <3 x i32>, target("dx.Padding", 4) }>], + <3 x i32> + }>, + target("dx.Padding", 4), + <{ + [2 x <{ %S, target("dx.Padding", 8) }>], %S + }> +}> + +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison + +; CHECK: define void @f +define void @f(ptr %dst, i32 %idx) { +entry: + %CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb + %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4 + + ;; w[2].z + ; + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 + ; CHECK: store i32 [[X]], ptr %dst + %w_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) + %w_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %w_ptr, i32 40 + %w_load = load i32, ptr addrspace(2) %w_gep, align 4 + store i32 %w_load, ptr %dst, align 4 + + ;; v[2].q + ; + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 8) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4 + ; CHECK: store i32 [[X]], ptr [[PTR]] + %v_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48) + %v_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %v_ptr, i32 84 + %v_load = load i32, ptr addrspace(2) %v_gep, align 4 + %v.i = getelementptr inbounds nuw i8, ptr %dst, i32 4 + store i32 %v_load, ptr %v.i, align 4 + + ret void +} diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll new file mode 100644 index 0000000000000..cbd76fc00b813 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll @@ -0,0 +1,45 @@ +; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s +; +; Test for when we have indices into both the array and the vector: ie, s[1][3] + +; cbuffer CB : register(b0) { +; uint4 s[2]; // offset 0, size 16 * 2 +; } +%__cblayout_CB = type <{ [2 x <4 x i32>] }> + +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison + +; CHECK: define void @f +define void @f(ptr %dst) { +entry: + %CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb + %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4 + + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3 + ; CHECK: store i32 [[X]], ptr %dst + %i8_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) + %i8_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %i8_ptr, i32 28 + %i8_vecext = load i32, ptr addrspace(2) %i8_gep, align 4 + store i32 %i8_vecext, ptr %dst, align 4 + + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3 + ;; + ;; It would be nice to avoid the redundant vector creation here, but that's + ;; outside of the scope of this pass. + ;; + ; CHECK: [[X_VEC:%.*]] = insertelement <4 x i32> {{%.*}}, i32 [[X]], i32 3 + ; CHECK: [[X_EXT:%.*]] = extractelement <4 x i32> [[X_VEC]], i32 3 + ; CHECK: store i32 [[X_EXT]], ptr %dst + %typed_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) + %typed_gep = getelementptr <4 x i32>, ptr addrspace(2) %typed_ptr, i32 1 + %typed_load = load <4 x i32>, ptr addrspace(2) %typed_gep, align 16 + %typed_vecext = extractelement <4 x i32> %typed_load, i32 3 + store i32 %typed_vecext, ptr %dst, align 4 + + ret void +} diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll new file mode 100644 index 0000000000000..a561d8c0aea5d --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll @@ -0,0 +1,27 @@ +; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s + +; cbuffer CB : register(b0) { +; float a1[3]; +; } +%__cblayout_CB = type <{ [2 x <{ float, [12 x i8] }>], float }> + +@CB.cb = global target("dx.CBuffer", %__cblayout_CB) poison + +; CHECK: define void @f +define void @f(ptr %dst) { +entry: + %CB.cb_h = call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) + ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 + ; CHECK: store float [[X]], ptr %dst + %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 8 + %a1_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) + %a1_gep = getelementptr inbounds <{ [2 x <{ float, [12 x i8] }>], float }>, ptr addrspace(2) %a1_ptr, i32 0, i32 0, i32 1 + %a1 = load float, ptr addrspace(2) %a1_gep, align 4 + store float %a1, ptr %dst, align 32 + + ret void +} diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-arrays.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-arrays.ll new file mode 100644 index 0000000000000..68dfbb184083f --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-arrays.ll @@ -0,0 +1,129 @@ +; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s + +; cbuffer CB : register(b0) { +; float a1[3]; // offset 0, size 4 (+12) * 3 +; double3 a2[2]; // offset 48, size 24 (+8) * 2 +; float16_t a3[2][2]; // offset 112, size 2 (+14) * 4 +; uint64_t a4[3]; // offset 176, size 8 (+8) * 3 +; int4 a5[2][3][4]; // offset 224, size 16 * 24 +; uint16_t a6[1]; // offset 608, size 2 (+14) * 1 +; int64_t a7[2]; // offset 624, size 8 (+8) * 2 +; bool a8[4]; // offset 656, size 4 (+12) * 4 +; } +%__cblayout_CB = type <{ + <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12), + <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, target("dx.Padding", 8), + <{ [3 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14), + <{ [2 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8), + [24 x <4 x i32>], + [1 x i16], target("dx.Padding", 14), + <{ [1 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8), + <{ [3 x <{ i32, target("dx.Padding", 12) }>], i32 }> +}> + +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison + +; CHECK: define void @f +define void @f(ptr %dst) { +entry: + %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb + %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4 + + ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) + ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 + ; CHECK: store float [[X]], ptr %dst + %a1_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) + %a1_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a1_ptr, i32 16 + %a1 = load float, ptr addrspace(2) %a1_gep, align 4 + store float %a1, ptr %dst, align 32 + + ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 5) + ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0 + ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1 + ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 6) + ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0 + ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0 + ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1 + ; CHECK: [[VEC2:%.*]] = insertelement <3 x double> [[VEC1]], double [[Z]], i32 2 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8 + ; CHECK: store <3 x double> [[VEC2]], ptr [[PTR]] + %a2_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48) + %a2_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a2_ptr, i32 32 + %a2 = load <3 x double>, ptr addrspace(2) %a2_gep, align 8 + %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 8 + store <3 x double> %a2, ptr %a2.i, align 32 + + ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 8) + ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 32 + ; CHECK: store half [[X]], ptr [[PTR]] + %a3_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 112) + %a3_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a3_ptr, i32 16 + %a3 = load half, ptr addrspace(2) %a3_gep, align 2 + %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 32 + store half %a3, ptr %a3.i, align 2 + + ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 12) + ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40 + ; CHECK: store i64 [[X]], ptr [[PTR]] + %a4_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 176) + %a4_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a4_ptr, i32 16 + %a4 = load i64, ptr addrspace(2) %a4_gep, align 8 + %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 40 + store i64 %a4, ptr %a4.i, align 8 + + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 26) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 + ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 + ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 + ; CHECK: [[A:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3 + ; CHECK: [[VEC0:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0 + ; CHECK: [[VEC1:%.*]] = insertelement <4 x i32> [[VEC0]], i32 [[Y]], i32 1 + ; CHECK: [[VEC2:%.*]] = insertelement <4 x i32> [[VEC1]], i32 [[Z]], i32 2 + ; CHECK: [[VEC3:%.*]] = insertelement <4 x i32> [[VEC2]], i32 [[A]], i32 3 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 48 + ; CHECK: store <4 x i32> [[VEC3]], ptr [[PTR]] + %a5_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 224) + %a5_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a5_ptr, i32 192 + %a5 = load <4 x i32>, ptr addrspace(2) %a5_gep, align 4 + %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 48 + store <4 x i32> %a5, ptr %a5.i, align 4 + + ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 38) + ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 64 + ; CHECK: store i16 [[X]], ptr [[PTR]] + %a6_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 608) + %a6 = load i16, ptr addrspace(2) %a6_ptr, align 2 + %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 64 + store i16 %a6, ptr %a6.i, align 2 + + ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 40) + ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72 + ; CHECK: store i64 [[X]], ptr [[PTR]] + %a7_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 624) + %a7_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a7_ptr, i32 16 + %a7 = load i64, ptr addrspace(2) %a7_gep, align 8 + %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 72 + store i64 %a7, ptr %a7.i, align 8 + + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 42) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 80 + ; CHECK: store i32 [[X]], ptr [[PTR]] + %a8_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 656) + %a8_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a8_ptr, i32 16 + %a8 = load i32, ptr addrspace(2) %a8_gep, align 4, !range !0, !noundef !1 + %a8.i = getelementptr inbounds nuw i8, ptr %dst, i32 80 + store i32 %a8, ptr %a8.i, align 4 + + ret void +} + +!0 = !{i32 0, i32 2} +!1 = !{} diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic-struct.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic-struct.ll new file mode 100644 index 0000000000000..9c2ec2399d8d5 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic-struct.ll @@ -0,0 +1,64 @@ +; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s +; +; Tests for indexed types in dynamically indexed arrays in cbuffers. +; +; Bug https://github.com/llvm/llvm-project/issues/164517 +; XFAIL: * +; +; struct S { +; float x[2]; +; uint q; +; }; +; cbuffer CB : register(b0) { +; uint32_t3 w[3]; // offset 0, size 12 (+4) * 3 +; S v[3]; // offset 48, size 24 (+8) * 3 +; } +%S = type <{ <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, i32 }> +%__cblayout_CB = type <{ + <{ + [2 x <{ <3 x i32>, target("dx.Padding", 4) }>], + <3 x i32> + }>, + target("dx.Padding", 4), + <{ + [2 x <{ %S, target("dx.Padding", 8) }>], %S + }> +}> + +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison + +; CHECK: define void @f +define void @f(ptr %dst, i32 %idx) { +entry: + %CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb + %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4 + + ;; w[Tid.x].z + ; + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 %idx) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 + ; CHECK: store i32 [[X]], ptr %dst + %w_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) + %w_arrayidx = getelementptr <3 x i32>, ptr addrspace(2) %w_ptr, i32 %idx + %w_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %w_arrayidx, i32 4 + %w_load = load i32, ptr addrspace(2) %w_gep, align 4 + store i32 %w_load, ptr %dst, align 4 + + ;; v[Tid.x].q + ; + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 %idx) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4 + ; CHECK: store i32 [[X]], ptr [[PTR]] + %v_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48) + %v_arrayidx = getelementptr <{ %struct.S, target("dx.Padding", 4) }>, ptr addrspace(2) %v_ptr, i32 %idx + %v_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %v_arrayidx, i32 8 + %v_load = load i32, ptr addrspace(2) %v_gep, align 4 + %v.i = getelementptr inbounds nuw i8, ptr %dst, i32 4 + store i32 %v_load, ptr %v.i, align 4 + + ret void +} diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll new file mode 100644 index 0000000000000..1b486a1f40f22 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll @@ -0,0 +1,42 @@ +; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s +; +; Tests for dynamic indices into arrays in cbuffers. + +; cbuffer CB : register(b0) { +; uint s[10]; // offset 0, size 4 (+12) * 10 +; uint t[10]; // offset 160, size 4 (+12) * 10 +; } +%__cblayout_CB = type <{ <{ [9 x <{ i32, target("dx.Padding", 12) }>], i32 }> }> + +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison + +; CHECK: define void @f +define void @f(ptr %dst, i32 %idx) { +entry: + %CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb + %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4 + + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 %idx) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 + ; CHECK: store i32 [[X]], ptr %dst + %s_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) + %s_gep = getelementptr <{ i32, target("dx.Padding", 12) }>, ptr addrspace(2) %s_ptr, i32 %idx + %s_load = load i32, ptr addrspace(2) %s_gep, align 4 + store i32 %s_load, ptr %dst, align 4 + + ; CHECK: [[T_IDX:%.*]] = add i32 10, %idx + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 [[T_IDX]]) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4 + ; CHECK: store i32 [[X]], ptr [[PTR]] + %t_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 160) + %t_gep = getelementptr <{ i32, target("dx.Padding", 12) }>, ptr addrspace(2) %t_ptr, i32 %idx + %t_load = load i32, ptr addrspace(2) %t_gep, align 4 + %t.i = getelementptr inbounds nuw i8, ptr %dst, i32 4 + store i32 %t_load, ptr %t.i, align 4 + + ret void +} diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-scalars.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-scalars.ll new file mode 100644 index 0000000000000..5a8275b4a6c12 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-scalars.ll @@ -0,0 +1,87 @@ +; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s + +; cbuffer CB { +; float a1; // offset 0, size 4 +; int a2; // offset 4, size 4 +; bool a3; // offset 8, size 4 +; float16_t a4; // offset 12, size 2 +; uint16_t a5; // offset 14, size 2 +; double a6; // offset 16, size 8 +; int64_t a7; // offset 24, size 8 +; } +%__cblayout_CB = type <{ float, i32, i32, half, i16, double, i64 }> + +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison + +; CHECK: define void @f +define void @f(ptr %dst) { +entry: + %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb + %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 8 + + ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0) + ; CHECK: [[A1:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 + ; CHECK: store float [[A1]], ptr %dst + %a1_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) + %a1 = load float, ptr addrspace(2) %a1_ptr, align 4 + store float %a1, ptr %dst, align 8 + + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0) + ; CHECK: [[A2:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4 + ; CHECK: store i32 [[A2]], ptr [[PTR]] + %a2_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 4) + %a2 = load i32, ptr addrspace(2) %a2_ptr, align 4 + %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 4 + store i32 %a2, ptr %a2.i, align 8 + + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0) + ; CHECK: [[A3:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8 + ; CHECK: store i32 [[A3]], ptr [[PTR]] + %a3_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 8) + %a3 = load i32, ptr addrspace(2) %a3_ptr, align 4 + %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 8 + store i32 %a3, ptr %a3.i, align 4 + + ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0) + ; CHECK: [[A4:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 6 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 12 + ; CHECK: store half [[A4]], ptr [[PTR]] + %a4_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 12) + %a4 = load half, ptr addrspace(2) %a4_ptr, align 2 + %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 12 + store half %a4, ptr %a4.i, align 4 + + ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0) + ; CHECK: [[A5:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 7 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 14 + ; CHECK: store i16 [[A5]], ptr [[PTR]] + %a5_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 14) + %a5 = load i16, ptr addrspace(2) %a5_ptr, align 2 + %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 14 + store i16 %a5, ptr %a5.i, align 2 + + ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) + ; CHECK: [[A6:%.*]] = extractvalue { double, double } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16 + ; CHECK: store double [[A6]], ptr [[PTR]] + %a6_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 16) + %a6 = load double, ptr addrspace(2) %a6_ptr, align 8 + %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 16 + store double %a6, ptr %a6.i, align 8 + + ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) + ; CHECK: [[A7:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 24 + ; CHECK: store i64 [[A7]], ptr [[PTR]] + %a7_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 24) + %a7 = load i64, ptr addrspace(2) %a7_ptr, align 8 + %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 24 + store i64 %a7, ptr %a7.i, align 8 + + ret void +} diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll similarity index 65% rename from llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll rename to llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll index 4160008a986af..1f20d0101c151 100644 --- a/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s ; cbuffer CB { ; float3 a1; // offset 0, size 12 (+4) @@ -8,26 +8,20 @@ ; int4 a5; // offset 80, size 16 ; uint16_t3 a6; // offset 96, size 6 (+10) ; }; -%__cblayout_CB = type <{ <3 x float>, <3 x double>, <2 x half>, <3 x i64>, <4 x i32>, <3 x i16> }> +%__cblayout_CB = type <{ <3 x float>, target("dx.Padding", 4), <3 x double>, <2 x half>, target("dx.Padding", 4), <3 x i64>, target("dx.Padding", 8), <4 x i32>, <3 x i16> }> -@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) poison -; CHECK: @CB.cb = -; CHECK-NOT: external {{.*}} addrspace(2) global -@a1 = external local_unnamed_addr addrspace(2) global <3 x float>, align 16 -@a2 = external local_unnamed_addr addrspace(2) global <3 x double>, align 32 -@a3 = external local_unnamed_addr addrspace(2) global <2 x half>, align 4 -@a4 = external local_unnamed_addr addrspace(2) global <3 x i64>, align 32 -@a5 = external local_unnamed_addr addrspace(2) global <4 x i32>, align 16 -@a6 = external local_unnamed_addr addrspace(2) global <3 x i16>, align 8 +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison ; CHECK: define void @f define void @f(ptr %dst) { entry: - %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) - store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) %CB.cb_h.i.i, ptr @CB.cb, align 4 + %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb + %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 8 + + ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0) ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 ; CHECK: [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 1 ; CHECK: [[Z:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 2 @@ -35,53 +29,53 @@ entry: ; CHECK: [[VEC1:%.*]] = insertelement <3 x float> [[VEC0]], float [[Y]], i32 1 ; CHECK: [[VEC2:%.*]] = insertelement <3 x float> [[VEC1]], float [[Z]], i32 2 ; CHECK: store <3 x float> [[VEC2]], ptr %dst - %a1 = load <3 x float>, ptr addrspace(2) @a1, align 16 + %a1_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) + %a1 = load <3 x float>, ptr addrspace(2) %a1_gep, align 16 store <3 x float> %a1, ptr %dst, align 4 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) + ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0 ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1 - ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2) + ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2) ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0 ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0 ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1 ; CHECK: [[VEC2:%.*]] = insertelement <3 x double> [[VEC1]], double [[Z]], i32 2 ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16 ; CHECK: store <3 x double> [[VEC2]], ptr [[PTR]] - %a2 = load <3 x double>, ptr addrspace(2) @a2, align 32 + %a2_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 16) + %a2 = load <3 x double>, ptr addrspace(2) %a2_gep, align 32 %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 16 store <3 x double> %a2, ptr %a2.i, align 8 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2) + ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2) ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 4 ; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 5 ; CHECK: [[VEC0:%.*]] = insertelement <2 x half> poison, half [[X]], i32 0 ; CHECK: [[VEC1:%.*]] = insertelement <2 x half> [[VEC0]], half [[Y]], i32 1 ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40 ; CHECK: store <2 x half> [[VEC1]], ptr [[PTR]] - %a3 = load <2 x half>, ptr addrspace(2) @a3, align 4 + %a3_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 40) + %a3 = load <2 x half>, ptr addrspace(2) %a3_gep, align 4 %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 40 store <2 x half> %a3, ptr %a3.i, align 2 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 3) + ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 3) ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 ; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1 - ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 4) + ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 4) ; CHECK: [[Z:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 ; CHECK: [[VEC0:%.*]] = insertelement <3 x i64> poison, i64 [[X]], i32 0 ; CHECK: [[VEC1:%.*]] = insertelement <3 x i64> [[VEC0]], i64 [[Y]], i32 1 ; CHECK: [[VEC2:%.*]] = insertelement <3 x i64> [[VEC1]], i64 [[Z]], i32 2 ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 48 ; CHECK: store <3 x i64> [[VEC2]], ptr [[PTR]] - %a4 = load <3 x i64>, ptr addrspace(2) @a4, align 32 + %a4_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48) + %a4 = load <3 x i64>, ptr addrspace(2) %a4_gep, align 32 %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 48 store <3 x i64> %a4, ptr %a4.i, align 8 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5) + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 5) ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 @@ -92,12 +86,12 @@ entry: ; CHECK: [[VEC3:%.*]] = insertelement <4 x i32> [[VEC2]], i32 [[A]], i32 3 ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72 ; CHECK: store <4 x i32> [[VEC3]], ptr [[PTR]] - %a5 = load <4 x i32>, ptr addrspace(2) @a5, align 16 + %a5_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 80) + %a5 = load <4 x i32>, ptr addrspace(2) %a5_gep, align 16 %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 72 store <4 x i32> %a5, ptr %a5.i, align 4 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6) + ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 6) ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0 ; CHECK: [[Y:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 1 ; CHECK: [[Z:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 2 @@ -106,14 +100,10 @@ entry: ; CHECK: [[VEC2:%.*]] = insertelement <3 x i16> [[VEC1]], i16 [[Z]], i32 2 ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 88 ; CHECK: store <3 x i16> [[VEC2]], ptr [[PTR]] - %a6 = load <3 x i16>, ptr addrspace(2) @a6, align 8 + %a6_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 96) + %a6 = load <3 x i16>, ptr addrspace(2) %a6_gep, align 8 %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 88 store <3 x i16> %a6, ptr %a6.i, align 2 ret void } - -; CHECK-NOT: !hlsl.cbs = -!hlsl.cbs = !{!0} - -!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6} diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll index 13c25396ea98f..fc078a6159ac4 100644 --- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll +++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll @@ -15,10 +15,10 @@ ; CHECK-NEXT: ModulePass Manager ; CHECK-NEXT: DXIL Finalize Linkage ; CHECK-NEXT: Dead Global Elimination +; CHECK-NEXT: DXIL CBuffer Access ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: DXIL Resource Access ; CHECK-NEXT: DXIL Intrinsic Expansion -; CHECK-NEXT: DXIL CBuffer Access ; CHECK-NEXT: DXIL Data Scalarization ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll index 4d32e66d017c9..8f6166b7232c3 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll @@ -1,5 +1,5 @@ ; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.6-vulkan1.3-library %s -o - | FileCheck %s -; Test that uses of cbuffer members inside ConstantExprs are handled correctly. +; Test that uses of cbuffer members are handled correctly. ; CHECK-DAG: OpDecorate %[[MyCBuffer:[0-9]+]] DescriptorSet 0 ; CHECK-DAG: OpDecorate %[[MyCBuffer]] Binding 0 @@ -21,7 +21,7 @@ %MyStruct = type { <4 x float> } %__cblayout_MyCBuffer = type <{ %MyStruct, <4 x float> }> -@MyCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_MyCBuffer, 32, 0, 16), 2, 0) poison +@MyCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) poison @s = external hidden local_unnamed_addr addrspace(12) global %MyStruct, align 16 @v = external hidden local_unnamed_addr addrspace(12) global <4 x float>, align 16 @MyCBuffer.str = private unnamed_addr constant [10 x i8] c"MyCBuffer\00", align 1 @@ -30,17 +30,15 @@ define void @main() { entry: ; CHECK: %[[tmp:[0-9]+]] = OpCopyObject %[[wrapper_ptr_t]] %[[MyCBuffer]] - %MyCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_MyCBuffer, 32, 0, 16), 2, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_MyCBuffers_32_0_16t_2_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @MyCBuffer.str) - store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_MyCBuffer, 32, 0, 16), 2, 0) %MyCBuffer.cb_h.i.i, ptr @MyCBuffer.cb, align 8 + %MyCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) @llvm.spv.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr nonnull @MyCBuffer.str) + store target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) %MyCBuffer.cb_h.i.i, ptr @MyCBuffer.cb, align 8 %0 = tail call target("spirv.Image", float, 5, 2, 0, 0, 2, 3) @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_2_3t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str) - + ; CHECK: %[[tmp_ptr:[0-9]+]] = OpAccessChain {{%[0-9]+}} %[[tmp]] %[[uint_0]] %[[uint_0]] ; CHECK: %[[v_ptr:.+]] = OpAccessChain %[[_ptr_Uniform_v4float]] %[[tmp]] %[[uint_0]] %[[uint_1]] ; CHECK: %[[s_ptr_gep:[0-9]+]] = OpInBoundsAccessChain %[[_ptr_Uniform_float]] %[[tmp_ptr]] %[[uint_0]] %[[uint_1]] - %gep = getelementptr inbounds %MyStruct, ptr addrspace(12) @s, i32 0, i32 0, i32 1 - ; CHECK: %[[s_val:.+]] = OpLoad %[[float]] %[[s_ptr_gep]] - %load_from_gep = load float, ptr addrspace(12) %gep, align 4 + %load_from_gep = load float, ptr addrspace(12) getelementptr inbounds (%MyStruct, ptr addrspace(12) @s, i32 0, i32 0, i32 1), align 4 ; CHECK: %[[v_val:.+]] = OpLoad %[[v4float]] %[[v_ptr]] %load_v = load <4 x float>, ptr addrspace(12) @v, align 16 diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll index c365452a9b404..670548d3d3e27 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll @@ -14,12 +14,12 @@ %__cblayout_PartiallyUsedCBuffer = type <{ float, i32 }> %__cblayout_AnotherCBuffer = type <{ <4 x float>, <4 x float> }> -@UnusedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_UnusedCBuffer, 4, 0), 2, 0) poison +@UnusedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_UnusedCBuffer, 2, 0) poison @UnusedCBuffer.str = private unnamed_addr constant [14 x i8] c"UnusedCBuffer\00", align 1 -@PartiallyUsedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_PartiallyUsedCBuffer, 8, 0, 4), 2, 0) poison +@PartiallyUsedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_PartiallyUsedCBuffer, 2, 0) poison @used_member = external hidden local_unnamed_addr addrspace(12) global float, align 4 @PartiallyUsedCBuffer.str = private unnamed_addr constant [21 x i8] c"PartiallyUsedCBuffer\00", align 1 -@AnotherCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_AnotherCBuffer, 32, 0, 16), 2, 0) poison +@AnotherCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_AnotherCBuffer, 2, 0) poison @a = external hidden local_unnamed_addr addrspace(12) global <4 x float>, align 16 @AnotherCBuffer.str = private unnamed_addr constant [15 x i8] c"AnotherCBuffer\00", align 1 @.str = private unnamed_addr constant [7 x i8] c"output\00", align 1 @@ -28,18 +28,18 @@ ; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(readwrite, argmem: write, inaccessiblemem: none) define void @main() local_unnamed_addr #1 { entry: - %UnusedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_UnusedCBuffer, 4, 0), 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_UnusedCBuffers_4_0t_2_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @UnusedCBuffer.str) - store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_UnusedCBuffer, 4, 0), 2, 0) %UnusedCBuffer.cb_h.i.i, ptr @UnusedCBuffer.cb, align 8 + %UnusedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_UnusedCBuffer, 2, 0) @llvm.spv.resource.handlefromimplicitbinding(i32 0, i32 0, i32 1, i32 0, ptr nonnull @UnusedCBuffer.str) + store target("spirv.VulkanBuffer", %__cblayout_UnusedCBuffer, 2, 0) %UnusedCBuffer.cb_h.i.i, ptr @UnusedCBuffer.cb, align 8 ; CHECK: %[[tmp:[0-9]+]] = OpCopyObject {{%[0-9]+}} %[[PartiallyUsedCBuffer]] ; CHECK: %[[used_member_ptr:.+]] = OpAccessChain %{{.+}} %[[tmp]] %{{.+}} %[[uint_0:[0-9]+]] - %PartiallyUsedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_PartiallyUsedCBuffer, 8, 0, 4), 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_PartiallyUsedCBuffers_8_0_4t_2_0t(i32 1, i32 0, i32 1, i32 0, ptr nonnull @PartiallyUsedCBuffer.str) - store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_PartiallyUsedCBuffer, 8, 0, 4), 2, 0) %PartiallyUsedCBuffer.cb_h.i.i, ptr @PartiallyUsedCBuffer.cb, align 8 + %PartiallyUsedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_PartiallyUsedCBuffer, 2, 0) @llvm.spv.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr nonnull @PartiallyUsedCBuffer.str) + store target("spirv.VulkanBuffer", %__cblayout_PartiallyUsedCBuffer, 2, 0) %PartiallyUsedCBuffer.cb_h.i.i, ptr @PartiallyUsedCBuffer.cb, align 8 ; CHECK: %[[tmp:[0-9]+]] = OpCopyObject {{%[0-9]+}} %[[AnotherCBuffer]] ; CHECK: %[[a_ptr:.+]] = OpAccessChain %{{.+}} %[[tmp]] %{{.+}} %[[uint_0]] - %AnotherCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_AnotherCBuffer, 32, 0, 16), 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_AnotherCBuffers_32_0_16t_2_0t(i32 2, i32 0, i32 1, i32 0, ptr nonnull @AnotherCBuffer.str) - store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_AnotherCBuffer, 32, 0, 16), 2, 0) %AnotherCBuffer.cb_h.i.i, ptr @AnotherCBuffer.cb, align 8 + %AnotherCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_AnotherCBuffer, 2, 0) @llvm.spv.resource.handlefromimplicitbinding(i32 2, i32 0, i32 1, i32 0, ptr nonnull @AnotherCBuffer.str) + store target("spirv.VulkanBuffer", %__cblayout_AnotherCBuffer, 2, 0) %AnotherCBuffer.cb_h.i.i, ptr @AnotherCBuffer.cb, align 8 %0 = tail call target("spirv.Image", float, 5, 2, 0, 0, 2, 1) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_f32_5_2_0_0_2_1t(i32 3, i32 0, i32 1, i32 0, ptr nonnull @.str) %2 = load float, ptr addrspace(12) @used_member, align 4