@@ -291,3 +291,209 @@ define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
291291 %gep = getelementptr inbounds i8 , ptr %base , i64 %mul
292292 ret ptr %gep
293293}
294+
295+ ; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectGlobalSAddr.
296+ define amdgpu_kernel void @uniform_base_varying_offset_imm (ptr addrspace (1 ) %p ) {
297+ ; GFX942_PTRADD-LABEL: uniform_base_varying_offset_imm:
298+ ; GFX942_PTRADD: ; %bb.0: ; %entry
299+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
300+ ; GFX942_PTRADD-NEXT: v_and_b32_e32 v0, 0x3ff, v0
301+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, 0
302+ ; GFX942_PTRADD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
303+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v2, 1
304+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
305+ ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
306+ ; GFX942_PTRADD-NEXT: global_store_dword v[0:1], v2, off offset:16
307+ ; GFX942_PTRADD-NEXT: s_endpgm
308+ ;
309+ ; GFX942_LEGACY-LABEL: uniform_base_varying_offset_imm:
310+ ; GFX942_LEGACY: ; %bb.0: ; %entry
311+ ; GFX942_LEGACY-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
312+ ; GFX942_LEGACY-NEXT: v_and_b32_e32 v0, 0x3ff, v0
313+ ; GFX942_LEGACY-NEXT: v_lshlrev_b32_e32 v0, 2, v0
314+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, 1
315+ ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
316+ ; GFX942_LEGACY-NEXT: global_store_dword v0, v1, s[0:1] offset:16
317+ ; GFX942_LEGACY-NEXT: s_endpgm
318+ entry:
319+ %tid = call i32 @llvm.amdgcn.workitem.id.x ()
320+ %shift = shl i32 %tid , 2
321+ %voffset = zext i32 %shift to i64
322+ %gep1 = getelementptr inbounds i8 , ptr addrspace (1 ) %p , i64 %voffset
323+ %gep2 = getelementptr inbounds i8 , ptr addrspace (1 ) %gep1 , i64 16
324+ store i32 1 , ptr addrspace (1 ) %gep2
325+ ret void
326+ }
327+
328+ ; Adjusted from global-saddr-load.ll. Tests PTRADD handling in
329+ ; AMDGPUDAGToDAGISel::SelectSMRDBaseOffset.
330+ define amdgpu_kernel void @global_load_saddr_i32_uniform_offset (ptr addrspace (1 ) %sbase , i32 %soffset , ptr addrspace (1 ) %r ) {
331+ ; GFX942_PTRADD-LABEL: global_load_saddr_i32_uniform_offset:
332+ ; GFX942_PTRADD: ; %bb.0:
333+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
334+ ; GFX942_PTRADD-NEXT: s_load_dword s6, s[4:5], 0x8
335+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10
336+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v0, 0
337+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
338+ ; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, s6
339+ ; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, 0
340+ ; GFX942_PTRADD-NEXT: s_load_dword s0, s[0:1], 0x0
341+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
342+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, s0
343+ ; GFX942_PTRADD-NEXT: global_store_dword v0, v1, s[2:3]
344+ ; GFX942_PTRADD-NEXT: s_endpgm
345+ ;
346+ ; GFX942_LEGACY-LABEL: global_load_saddr_i32_uniform_offset:
347+ ; GFX942_LEGACY: ; %bb.0:
348+ ; GFX942_LEGACY-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
349+ ; GFX942_LEGACY-NEXT: s_load_dword s6, s[4:5], 0x8
350+ ; GFX942_LEGACY-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10
351+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v0, 0
352+ ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
353+ ; GFX942_LEGACY-NEXT: s_load_dword s0, s[0:1], s6 offset:0x0
354+ ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
355+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, s0
356+ ; GFX942_LEGACY-NEXT: global_store_dword v0, v1, s[2:3]
357+ ; GFX942_LEGACY-NEXT: s_endpgm
358+ %zext.offset = zext i32 %soffset to i64
359+ %gep0 = getelementptr inbounds i8 , ptr addrspace (1 ) %sbase , i64 %zext.offset
360+ %load = load i32 , ptr addrspace (1 ) %gep0
361+ %to.vgpr = bitcast i32 %load to float
362+ store float %to.vgpr , ptr addrspace (1 ) %r
363+ ret void
364+ }
365+
366+ ; Adjusted from llvm.amdgcn.global.load.lds.ll, tests the offset lowering for
367+ ; Intrinsic::amdgcn_global_load_lds.
368+ define void @global_load_lds_dword_saddr_and_vaddr (ptr addrspace (1 ) nocapture inreg %gptr , ptr addrspace (3 ) nocapture %lptr , i32 %voffset ) {
369+ ; GFX942_PTRADD-LABEL: global_load_lds_dword_saddr_and_vaddr:
370+ ; GFX942_PTRADD: ; %bb.0: ; %main_body
371+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v2, v1
373+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v3, 0
374+ ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[2:3], s[0:1], 0, v[2:3]
375+ ; GFX942_PTRADD-NEXT: v_readfirstlane_b32 s0, v0
376+ ; GFX942_PTRADD-NEXT: s_mov_b32 m0, s0
377+ ; GFX942_PTRADD-NEXT: s_nop 0
378+ ; GFX942_PTRADD-NEXT: global_load_lds_dword v[2:3], off offset:48 sc1
379+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
380+ ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
381+ ;
382+ ; GFX942_LEGACY-LABEL: global_load_lds_dword_saddr_and_vaddr:
383+ ; GFX942_LEGACY: ; %bb.0: ; %main_body
384+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
385+ ; GFX942_LEGACY-NEXT: v_readfirstlane_b32 s2, v0
386+ ; GFX942_LEGACY-NEXT: s_mov_b32 m0, s2
387+ ; GFX942_LEGACY-NEXT: s_nop 0
388+ ; GFX942_LEGACY-NEXT: global_load_lds_dword v1, s[0:1] offset:48 sc1
389+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
390+ ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
391+ main_body:
392+ %voffset.64 = zext i32 %voffset to i64
393+ %gep = getelementptr i8 , ptr addrspace (1 ) %gptr , i64 %voffset.64
394+ call void @llvm.amdgcn.global.load.lds (ptr addrspace (1 ) %gep , ptr addrspace (3 ) %lptr , i32 4 , i32 48 , i32 16 )
395+ ret void
396+ }
397+
398+ ; Taken from shl_add_ptr_global.ll, tests PTRADD handling in
399+ ; SITargetLowering::performSHLPtrCombine.
400+ define void @shl_base_global_ptr_global_atomic_fadd (ptr addrspace (1 ) %out , ptr addrspace (1 ) %extra.use , ptr addrspace (1 ) %ptr ) {
401+ ; GFX942_PTRADD-LABEL: shl_base_global_ptr_global_atomic_fadd:
402+ ; GFX942_PTRADD: ; %bb.0:
403+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404+ ; GFX942_PTRADD-NEXT: s_mov_b64 s[0:1], 0x80
405+ ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1]
406+ ; GFX942_PTRADD-NEXT: v_lshlrev_b64 v[4:5], 2, v[0:1]
407+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v6, 0x42c80000
408+ ; GFX942_PTRADD-NEXT: global_atomic_add_f32 v[4:5], v6, off
409+ ; GFX942_PTRADD-NEXT: global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1
410+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0)
411+ ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
412+ ;
413+ ; GFX942_LEGACY-LABEL: shl_base_global_ptr_global_atomic_fadd:
414+ ; GFX942_LEGACY: ; %bb.0:
415+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416+ ; GFX942_LEGACY-NEXT: v_lshlrev_b64 v[0:1], 2, v[4:5]
417+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v6, 0x42c80000
418+ ; GFX942_LEGACY-NEXT: global_atomic_add_f32 v[0:1], v6, off offset:512
419+ ; GFX942_LEGACY-NEXT: s_mov_b64 s[0:1], 0x80
420+ ; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1]
421+ ; GFX942_LEGACY-NEXT: global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1
422+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0)
423+ ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
424+ %arrayidx0 = getelementptr inbounds [512 x i32 ], ptr addrspace (1 ) %ptr , i64 0 , i64 32
425+ %cast = ptrtoint ptr addrspace (1 ) %arrayidx0 to i64
426+ %shl = shl i64 %cast , 2
427+ %castback = inttoptr i64 %shl to ptr addrspace (1 )
428+ %unused = atomicrmw fadd ptr addrspace (1 ) %castback , float 100 .0 syncscope("agent" ) monotonic , align 4 , !amdgpu.no.fine.grained.memory !0 , !amdgpu.ignore.denormal.mode !0
429+ store volatile i64 %cast , ptr addrspace (1 ) %extra.use , align 4
430+ ret void
431+ }
432+
433+ ; Test PTRADD handling in TargetLowering::SimplifyDemandedBits and
434+ ; TargetLowering::ShrinkDemandedOp.
435+ define i32 @gep_in_const_as_cast_to_const32_as (ptr addrspace (4 ) %src , i64 %offset ) {
436+ ; GFX942_PTRADD-LABEL: gep_in_const_as_cast_to_const32_as:
437+ ; GFX942_PTRADD: ; %bb.0: ; %entry
438+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439+ ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
440+ ; GFX942_PTRADD-NEXT: s_mov_b32 s1, 0
441+ ; GFX942_PTRADD-NEXT: v_readfirstlane_b32 s0, v0
442+ ; GFX942_PTRADD-NEXT: s_load_dword s0, s[0:1], 0x0
443+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
444+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v0, s0
445+ ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
446+ ;
447+ ; GFX942_LEGACY-LABEL: gep_in_const_as_cast_to_const32_as:
448+ ; GFX942_LEGACY: ; %bb.0: ; %entry
449+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
450+ ; GFX942_LEGACY-NEXT: v_add_u32_e32 v0, v0, v2
451+ ; GFX942_LEGACY-NEXT: s_mov_b32 s1, 0
452+ ; GFX942_LEGACY-NEXT: v_readfirstlane_b32 s0, v0
453+ ; GFX942_LEGACY-NEXT: s_load_dword s0, s[0:1], 0x0
454+ ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
455+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v0, s0
456+ ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
457+ entry:
458+ %gep = getelementptr i8 , ptr addrspace (4 ) %src , i64 %offset
459+ %gep.cast = addrspacecast ptr addrspace (4 ) %gep to ptr addrspace (6 )
460+ %l = load i32 , ptr addrspace (6 ) %gep.cast
461+ ret i32 %l
462+ }
463+
464+ @CG = addrspace (4 ) constant [16 x i32 ] zeroinitializer , align 4
465+
466+ ; Test PTRADD handling in isMemSrcFromConstant.
467+ define void @replace_const0_memcpy_by_memset (ptr align 4 %dst ) {
468+ ; GFX942_PTRADD-LABEL: replace_const0_memcpy_by_memset:
469+ ; GFX942_PTRADD: ; %bb.0: ; %entry
470+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
471+ ; GFX942_PTRADD-NEXT: s_getpc_b64 s[0:1]
472+ ; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, CG@gotpcrel32@lo+4
473+ ; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, CG@gotpcrel32@hi+12
474+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
475+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
476+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4
477+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
478+ ; GFX942_PTRADD-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
479+ ; GFX942_PTRADD-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
480+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
481+ ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
482+ ;
483+ ; GFX942_LEGACY-LABEL: replace_const0_memcpy_by_memset:
484+ ; GFX942_LEGACY: ; %bb.0: ; %entry
485+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
486+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v2, 0
487+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v3, v2
488+ ; GFX942_LEGACY-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
489+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
490+ ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
491+ entry:
492+ %gep = getelementptr i8 , ptr addrspace (4 ) @CG , i64 4
493+ tail call void @llvm.memcpy.p0.p4.i64 (ptr noundef nonnull align 4 %dst , ptr addrspace (4 ) noundef nonnull align 4 %gep , i64 8 , i1 false )
494+ ret void
495+ }
496+
497+ declare void @llvm.memcpy.p0.p4.i64 (ptr noalias nocapture writeonly , ptr addrspace (4 ) noalias nocapture readonly , i64 , i1 immarg)
498+
499+ !0 = !{}
0 commit comments