@@ -39,7 +39,8 @@ static bool regSortCompareAfterRA(regMapBRA map1, regMapBRA map2) {
3939 return false ;
4040}
4141
42- void changeToIndirectSend (G4_INST *inst, G4_Declare *s0Var, int totalRegs, IR_Builder &builder) {
42+ void changeToIndirectSend (G4_INST *inst, G4_Declare *s0Var, int totalRegs,
43+ IR_Builder &builder, bool isLargeGRF) {
4344 // Change the send instruction to sendi
4445 G4_InstSend *Send = inst->asSendInst ();
4546 G4_SendDescRaw *desc = Send->getMsgDescRaw ();
@@ -53,8 +54,9 @@ void changeToIndirectSend(G4_INST *inst, G4_Declare *s0Var, int totalRegs, IR_Bu
5354 inst->setSrc (msgDescImm, 2 );
5455
5556 // Replace source 0 with scalar register
56- G4_SrcRegRegion *headerOpnd = builder.createSrcRegRegion (
57- Mod_src_undef, IndirGRF, s0Var->getRegVar (), 0 , 0 , builder.getRegionScalar (), Type_UB);
57+ G4_SrcRegRegion *headerOpnd =
58+ builder.createSrcRegRegion (Mod_src_undef, IndirGRF, s0Var->getRegVar (), 0 ,
59+ 0 , builder.getRegionScalar (), Type_UB);
5860 // Replace source 1 with null.
5961 G4_SrcRegRegion *payloadToUse = builder.createNullSrc (Type_UD);
6062
@@ -284,7 +286,7 @@ bool SRSubPass::replaceWithSendi(G4_BB *bb, INST_LIST_ITER instIter,
284286 bb->insertBefore (instIter, movInst);
285287 }
286288
287- changeToIndirectSend (inst, s0Var, totalRegs, builder);
289+ changeToIndirectSend (inst, s0Var, totalRegs, builder, false );
288290
289291 return true ;
290292}
@@ -431,7 +433,7 @@ void SRSubPass::SRSub(G4_BB *bb) {
431433// Check if current instruction is the candidate of sendi.
432434// Recorded as candidate.
433435bool SRSubPassAfterRA::isSRCandidateAfterRA (G4_INST *inst,
434- regCandidatesBRA &dstSrcRegs) {
436+ regCandidatesBRA &dstSrcRegs) {
435437 if (!inst->isSend ()) {
436438 return false ;
437439 }
@@ -460,7 +462,7 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
460462 return false ;
461463 }
462464
463- // The size of LSC src0 and src1 may not be GRF aligned.
465+ // The size of LSC src0 and src1 may not be GRF aligned.
464466 if (inst->getMsgDesc ()->getSrc1LenBytes () % builder.getGRFSize () != 0 ||
465467 inst->getMsgDesc ()->getSrc0LenBytes () % builder.getGRFSize () != 0 ) {
466468 return false ;
@@ -748,11 +750,12 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
748750 return true ;
749751}
750752
753+
751754// Replace the send instruction with the payload of
752755// Insert the scalar register intialization mov instructions.
753756bool SRSubPassAfterRA::replaceWithSendiAfterRA (G4_BB *bb,
754- INST_LIST_ITER instIter,
755- regCandidatesBRA &dstSrcRegs) {
757+ INST_LIST_ITER instIter,
758+ regCandidatesBRA &dstSrcRegs) {
756759 G4_INST *inst = *instIter;
757760 std::vector<G4_AddrExp *> srcs;
758761 G4_AddrExp *src;
@@ -854,21 +857,38 @@ bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
854857
855858 // Initialize the scalar registers.
856859 uint16_t UQNum = totalRegs > (TypeSize (Type_UQ) / TypeSize (Type_UB)) ? 2 : 1 ;
860+ if (dstSrcRegs.isLargeGRF ) {
861+ UQNum = totalRegs > (TypeSize (Type_UQ) / TypeSize (Type_UW)) ? 2 : 1 ;
862+ }
857863 G4_Declare *s0Var = builder.createTempScalar (UQNum, " S0_" );
858864 s0Var->getRegVar ()->setPhyReg (builder.phyregpool .getScalarReg (), 0 );
859865 G4_DstRegRegion *dst =
860866 builder.createDst (s0Var->getRegVar (), 0 , 0 , 1 , Type_UQ);
861- G4_INST *movInst = builder.createIntrinsicAddrMovInst (
862- Intrinsic::PseudoAddrMov, dst, srcs[0 ], srcs[1 ], srcs[2 ], srcs[3 ],
863- srcs[4 ], srcs[5 ], srcs[6 ], srcs[7 ], false );
867+ G4_INST *movInst = nullptr ;
868+ if (!dstSrcRegs.isLargeGRF ) {
869+ movInst = builder.createIntrinsicAddrMovInst (
870+ Intrinsic::PseudoAddrMov, dst, srcs[0 ], srcs[1 ], srcs[2 ], srcs[3 ],
871+ srcs[4 ], srcs[5 ], srcs[6 ], srcs[7 ], false );
872+ } else {
873+ movInst = builder.createIntrinsicAddrMovInst (
874+ Intrinsic::PseudoAddrMovW, dst, srcs[0 ], srcs[1 ], srcs[2 ], srcs[3 ],
875+ nullptr , nullptr , nullptr , nullptr , false );
876+ }
864877 bb->insertBefore (instIter, movInst);
865878
866- if (totalRegs > 8 ) {
879+ if (UQNum > 1 ) {
867880 G4_DstRegRegion *dst1 =
868881 builder.createDst (s0Var->getRegVar (), 0 , 1 , 1 , Type_UQ);
869- G4_INST *movInst1 = builder.createIntrinsicAddrMovInst (
870- Intrinsic::PseudoAddrMov, dst1, srcs[8 ], srcs[9 ], srcs[10 ], srcs[11 ],
871- srcs[12 ], srcs[13 ], srcs[14 ], nullptr , false );
882+ G4_INST *movInst1 = nullptr ;
883+ if (!dstSrcRegs.isLargeGRF ) {
884+ movInst1 = builder.createIntrinsicAddrMovInst (
885+ Intrinsic::PseudoAddrMov, dst1, srcs[8 ], srcs[9 ], srcs[10 ], srcs[11 ],
886+ srcs[12 ], srcs[13 ], srcs[14 ], nullptr , false );
887+ } else {
888+ movInst1 = builder.createIntrinsicAddrMovInst (
889+ Intrinsic::PseudoAddrMovW, dst1, srcs[4 ], srcs[5 ], srcs[6 ], srcs[7 ],
890+ nullptr , nullptr , nullptr , nullptr , false );
891+ }
872892 bb->insertBefore (instIter, movInst1);
873893 }
874894
@@ -883,7 +903,7 @@ bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
883903 }
884904 }
885905
886- changeToIndirectSend (inst, s0Var, totalRegs, builder);
906+ changeToIndirectSend (inst, s0Var, totalRegs, builder, dstSrcRegs. isLargeGRF );
887907
888908 return true ;
889909}
@@ -1005,7 +1025,8 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
10051025 int srcRegLB = (*dstSrcRegsIter).opnd ->getLinearizedStart ();
10061026 int srcRegRB = (*dstSrcRegsIter).opnd ->getLinearizedEnd ();
10071027 if (!(srcRegRB < dstRegLB || srcRegLB > dstRegRB)) {
1008- // Register is redefined.
1028+
1029+ // Register is redefined
10091030 dstSrcRegsIter =
10101031 candidates[inst].dstSrcMap .erase (dstSrcRegsIter);
10111032 } else {
0 commit comments