[AMDGPU][True16] Add regbank combiner cases to fix regression around G_SEXTLOAD#198671
Conversation
|
@llvm/pr-subscribers-llvm-globalisel Author: Domenic Nutile (saxlungs) Changes<sub>Stack created with <a href="https://github.com/github/gh-stack">GitHub Stacks CLI</a> • <a href="https://gh.io/stacks-feedback">Give Feedback 💬</a></sub> See #195289 for previous discussion Full diff: https://github.com/llvm/llvm-project/pull/198671.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index 990d879c2bf09..540730162e0cb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -420,7 +420,8 @@ bool AMDGPURegBankCombinerImpl::combineD16Load(MachineInstr &MI) const {
if (mi_match(
Load, MRI,
- m_GAnd(m_MInstr(SextLoad), m_Copy(m_SpecificICst(CleanHi16))))) {
+ m_GAnd(m_MInstr(SextLoad), m_Copy(m_SpecificICst(CleanHi16)))) ||
+ mi_match(Load, MRI, m_GZExt(m_MInstr(SextLoad)))) {
if (SextLoad->getOpcode() != AMDGPU::G_SEXTLOAD)
return false;
@@ -428,6 +429,12 @@ bool AMDGPURegBankCombinerImpl::combineD16Load(MachineInstr &MI) const {
if (MMO->getSizeInBits().getValue() != 8)
return false;
+ if (Load->getOpcode() == TargetOpcode::G_ZEXT) {
+ LLT SextDstTy = MRI.getType(SextLoad->getOperand(0).getReg());
+ if (SextDstTy.getSizeInBits() != 16)
+ return false;
+ }
+
return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO_I8, MI, SextLoad, Dst);
}
@@ -452,13 +459,21 @@ bool AMDGPURegBankCombinerImpl::combineD16Load(MachineInstr &MI) const {
if (mi_match(
Load, MRI,
- m_GAnd(m_MInstr(SextLoad), m_Copy(m_SpecificICst(CleanHi16))))) {
+ m_GAnd(m_MInstr(SextLoad), m_Copy(m_SpecificICst(CleanHi16)))) ||
+ mi_match(Load, MRI, m_GZExt(m_MInstr(SextLoad)))) {
if (SextLoad->getOpcode() != AMDGPU::G_SEXTLOAD)
return false;
+
const MachineMemOperand *MMO = *SextLoad->memoperands_begin();
if (MMO->getSizeInBits().getValue() != 8)
return false;
+ if (Load->getOpcode() == TargetOpcode::G_ZEXT) {
+ LLT SextDstTy = MRI.getType(SextLoad->getOperand(0).getReg());
+ if (SextDstTy.getSizeInBits() != 16)
+ return false;
+ }
+
return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI_I8, MI, SextLoad, Dst);
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-zextload-s16-true16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-zextload-s16-true16.mir
index 7f8c1451b8019..4f328472d55b2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-zextload-s16-true16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-zextload-s16-true16.mir
@@ -81,51 +81,21 @@ body: |
; GFX12-FAKE16-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX12-FAKE16-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
;
- ; GFX11-TO-COMBINER-TRUE16-LABEL: name: test_sextload_global_s16_from_s8
- ; GFX11-TO-COMBINER-TRUE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX11-TO-COMBINER-TRUE16-NEXT: {{ $}}
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s16) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[SEXTLOAD]](s16)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -65536
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[BITCAST]], [[COPY2]]
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[ZEXT]]
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ;
- ; GFX11-TO-COMBINER-FAKE16-LABEL: name: test_sextload_global_s16_from_s8
- ; GFX11-TO-COMBINER-FAKE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX11-TO-COMBINER-FAKE16-NEXT: {{ $}}
- ; GFX11-TO-COMBINER-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX11-TO-COMBINER-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX11-TO-COMBINER-FAKE16-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 1)
- ; GFX11-TO-COMBINER-FAKE16-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
- ;
- ; GFX12-TO-COMBINER-TRUE16-LABEL: name: test_sextload_global_s16_from_s8
- ; GFX12-TO-COMBINER-TRUE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX12-TO-COMBINER-TRUE16-NEXT: {{ $}}
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s16) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[SEXTLOAD]](s16)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -65536
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[BITCAST]], [[COPY2]]
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[ZEXT]]
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ; GFX11-TO-COMBINER-LABEL: name: test_sextload_global_s16_from_s8
+ ; GFX11-TO-COMBINER: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX11-TO-COMBINER-NEXT: {{ $}}
+ ; GFX11-TO-COMBINER-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX11-TO-COMBINER-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
+ ; GFX11-TO-COMBINER-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 1)
+ ; GFX11-TO-COMBINER-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
;
- ; GFX12-TO-COMBINER-FAKE16-LABEL: name: test_sextload_global_s16_from_s8
- ; GFX12-TO-COMBINER-FAKE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX12-TO-COMBINER-FAKE16-NEXT: {{ $}}
- ; GFX12-TO-COMBINER-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX12-TO-COMBINER-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX12-TO-COMBINER-FAKE16-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 1)
- ; GFX12-TO-COMBINER-FAKE16-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
+ ; GFX12-TO-COMBINER-LABEL: name: test_sextload_global_s16_from_s8
+ ; GFX12-TO-COMBINER: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX12-TO-COMBINER-NEXT: {{ $}}
+ ; GFX12-TO-COMBINER-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX12-TO-COMBINER-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
+ ; GFX12-TO-COMBINER-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 1)
+ ; GFX12-TO-COMBINER-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(s16) = G_SEXTLOAD %0 :: (load (s8), addrspace 1)
@@ -206,51 +176,21 @@ body: |
; GFX12-FAKE16-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX12-FAKE16-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
;
- ; GFX11-TO-COMBINER-TRUE16-LABEL: name: test_sextload_local_s16_from_s8
- ; GFX11-TO-COMBINER-TRUE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX11-TO-COMBINER-TRUE16-NEXT: {{ $}}
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s16) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 3)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[SEXTLOAD]](s16)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -65536
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[BITCAST]], [[COPY2]]
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[ZEXT]]
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ;
- ; GFX11-TO-COMBINER-FAKE16-LABEL: name: test_sextload_local_s16_from_s8
- ; GFX11-TO-COMBINER-FAKE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX11-TO-COMBINER-FAKE16-NEXT: {{ $}}
- ; GFX11-TO-COMBINER-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX11-TO-COMBINER-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX11-TO-COMBINER-FAKE16-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 3)
- ; GFX11-TO-COMBINER-FAKE16-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
- ;
- ; GFX12-TO-COMBINER-TRUE16-LABEL: name: test_sextload_local_s16_from_s8
- ; GFX12-TO-COMBINER-TRUE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX12-TO-COMBINER-TRUE16-NEXT: {{ $}}
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s16) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 3)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[SEXTLOAD]](s16)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -65536
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[BITCAST]], [[COPY2]]
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[ZEXT]]
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ; GFX11-TO-COMBINER-LABEL: name: test_sextload_local_s16_from_s8
+ ; GFX11-TO-COMBINER: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX11-TO-COMBINER-NEXT: {{ $}}
+ ; GFX11-TO-COMBINER-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX11-TO-COMBINER-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
+ ; GFX11-TO-COMBINER-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 3)
+ ; GFX11-TO-COMBINER-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
;
- ; GFX12-TO-COMBINER-FAKE16-LABEL: name: test_sextload_local_s16_from_s8
- ; GFX12-TO-COMBINER-FAKE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX12-TO-COMBINER-FAKE16-NEXT: {{ $}}
- ; GFX12-TO-COMBINER-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX12-TO-COMBINER-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX12-TO-COMBINER-FAKE16-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 3)
- ; GFX12-TO-COMBINER-FAKE16-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
+ ; GFX12-TO-COMBINER-LABEL: name: test_sextload_local_s16_from_s8
+ ; GFX12-TO-COMBINER: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX12-TO-COMBINER-NEXT: {{ $}}
+ ; GFX12-TO-COMBINER-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX12-TO-COMBINER-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
+ ; GFX12-TO-COMBINER-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 3)
+ ; GFX12-TO-COMBINER-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(s16) = G_SEXTLOAD %0 :: (load (s8), addrspace 3)
@@ -331,51 +271,21 @@ body: |
; GFX12-FAKE16-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX12-FAKE16-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
;
- ; GFX11-TO-COMBINER-TRUE16-LABEL: name: test_sextload_private_s16_from_s8
- ; GFX11-TO-COMBINER-TRUE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX11-TO-COMBINER-TRUE16-NEXT: {{ $}}
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s16) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 5)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[SEXTLOAD]](s16)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -65536
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[BITCAST]], [[COPY2]]
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[ZEXT]]
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ;
- ; GFX11-TO-COMBINER-FAKE16-LABEL: name: test_sextload_private_s16_from_s8
- ; GFX11-TO-COMBINER-FAKE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX11-TO-COMBINER-FAKE16-NEXT: {{ $}}
- ; GFX11-TO-COMBINER-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX11-TO-COMBINER-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX11-TO-COMBINER-FAKE16-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 5)
- ; GFX11-TO-COMBINER-FAKE16-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
- ;
- ; GFX12-TO-COMBINER-TRUE16-LABEL: name: test_sextload_private_s16_from_s8
- ; GFX12-TO-COMBINER-TRUE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX12-TO-COMBINER-TRUE16-NEXT: {{ $}}
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s16) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 5)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[SEXTLOAD]](s16)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -65536
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[BITCAST]], [[COPY2]]
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[ZEXT]]
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ; GFX11-TO-COMBINER-LABEL: name: test_sextload_private_s16_from_s8
+ ; GFX11-TO-COMBINER: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX11-TO-COMBINER-NEXT: {{ $}}
+ ; GFX11-TO-COMBINER-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX11-TO-COMBINER-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
+ ; GFX11-TO-COMBINER-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 5)
+ ; GFX11-TO-COMBINER-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
;
- ; GFX12-TO-COMBINER-FAKE16-LABEL: name: test_sextload_private_s16_from_s8
- ; GFX12-TO-COMBINER-FAKE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX12-TO-COMBINER-FAKE16-NEXT: {{ $}}
- ; GFX12-TO-COMBINER-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX12-TO-COMBINER-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX12-TO-COMBINER-FAKE16-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 5)
- ; GFX12-TO-COMBINER-FAKE16-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
+ ; GFX12-TO-COMBINER-LABEL: name: test_sextload_private_s16_from_s8
+ ; GFX12-TO-COMBINER: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX12-TO-COMBINER-NEXT: {{ $}}
+ ; GFX12-TO-COMBINER-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX12-TO-COMBINER-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
+ ; GFX12-TO-COMBINER-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 5)
+ ; GFX12-TO-COMBINER-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(s16) = G_SEXTLOAD %0 :: (load (s8), addrspace 5)
|
|
@llvm/pr-subscribers-backend-amdgpu Author: Domenic Nutile (saxlungs) Changes<sub>Stack created with <a href="https://github.com/github/gh-stack">GitHub Stacks CLI</a> • <a href="https://gh.io/stacks-feedback">Give Feedback 💬</a></sub> See #195289 for previous discussion Full diff: https://github.com/llvm/llvm-project/pull/198671.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index 990d879c2bf09..540730162e0cb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -420,7 +420,8 @@ bool AMDGPURegBankCombinerImpl::combineD16Load(MachineInstr &MI) const {
if (mi_match(
Load, MRI,
- m_GAnd(m_MInstr(SextLoad), m_Copy(m_SpecificICst(CleanHi16))))) {
+ m_GAnd(m_MInstr(SextLoad), m_Copy(m_SpecificICst(CleanHi16)))) ||
+ mi_match(Load, MRI, m_GZExt(m_MInstr(SextLoad)))) {
if (SextLoad->getOpcode() != AMDGPU::G_SEXTLOAD)
return false;
@@ -428,6 +429,12 @@ bool AMDGPURegBankCombinerImpl::combineD16Load(MachineInstr &MI) const {
if (MMO->getSizeInBits().getValue() != 8)
return false;
+ if (Load->getOpcode() == TargetOpcode::G_ZEXT) {
+ LLT SextDstTy = MRI.getType(SextLoad->getOperand(0).getReg());
+ if (SextDstTy.getSizeInBits() != 16)
+ return false;
+ }
+
return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO_I8, MI, SextLoad, Dst);
}
@@ -452,13 +459,21 @@ bool AMDGPURegBankCombinerImpl::combineD16Load(MachineInstr &MI) const {
if (mi_match(
Load, MRI,
- m_GAnd(m_MInstr(SextLoad), m_Copy(m_SpecificICst(CleanHi16))))) {
+ m_GAnd(m_MInstr(SextLoad), m_Copy(m_SpecificICst(CleanHi16)))) ||
+ mi_match(Load, MRI, m_GZExt(m_MInstr(SextLoad)))) {
if (SextLoad->getOpcode() != AMDGPU::G_SEXTLOAD)
return false;
+
const MachineMemOperand *MMO = *SextLoad->memoperands_begin();
if (MMO->getSizeInBits().getValue() != 8)
return false;
+ if (Load->getOpcode() == TargetOpcode::G_ZEXT) {
+ LLT SextDstTy = MRI.getType(SextLoad->getOperand(0).getReg());
+ if (SextDstTy.getSizeInBits() != 16)
+ return false;
+ }
+
return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI_I8, MI, SextLoad, Dst);
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-zextload-s16-true16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-zextload-s16-true16.mir
index 7f8c1451b8019..4f328472d55b2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-zextload-s16-true16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-zextload-s16-true16.mir
@@ -81,51 +81,21 @@ body: |
; GFX12-FAKE16-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX12-FAKE16-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
;
- ; GFX11-TO-COMBINER-TRUE16-LABEL: name: test_sextload_global_s16_from_s8
- ; GFX11-TO-COMBINER-TRUE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX11-TO-COMBINER-TRUE16-NEXT: {{ $}}
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s16) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[SEXTLOAD]](s16)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -65536
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[BITCAST]], [[COPY2]]
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[ZEXT]]
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ;
- ; GFX11-TO-COMBINER-FAKE16-LABEL: name: test_sextload_global_s16_from_s8
- ; GFX11-TO-COMBINER-FAKE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX11-TO-COMBINER-FAKE16-NEXT: {{ $}}
- ; GFX11-TO-COMBINER-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX11-TO-COMBINER-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX11-TO-COMBINER-FAKE16-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 1)
- ; GFX11-TO-COMBINER-FAKE16-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
- ;
- ; GFX12-TO-COMBINER-TRUE16-LABEL: name: test_sextload_global_s16_from_s8
- ; GFX12-TO-COMBINER-TRUE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX12-TO-COMBINER-TRUE16-NEXT: {{ $}}
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s16) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[SEXTLOAD]](s16)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -65536
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[BITCAST]], [[COPY2]]
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[ZEXT]]
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ; GFX11-TO-COMBINER-LABEL: name: test_sextload_global_s16_from_s8
+ ; GFX11-TO-COMBINER: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX11-TO-COMBINER-NEXT: {{ $}}
+ ; GFX11-TO-COMBINER-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX11-TO-COMBINER-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
+ ; GFX11-TO-COMBINER-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 1)
+ ; GFX11-TO-COMBINER-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
;
- ; GFX12-TO-COMBINER-FAKE16-LABEL: name: test_sextload_global_s16_from_s8
- ; GFX12-TO-COMBINER-FAKE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX12-TO-COMBINER-FAKE16-NEXT: {{ $}}
- ; GFX12-TO-COMBINER-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX12-TO-COMBINER-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX12-TO-COMBINER-FAKE16-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 1)
- ; GFX12-TO-COMBINER-FAKE16-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
+ ; GFX12-TO-COMBINER-LABEL: name: test_sextload_global_s16_from_s8
+ ; GFX12-TO-COMBINER: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX12-TO-COMBINER-NEXT: {{ $}}
+ ; GFX12-TO-COMBINER-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX12-TO-COMBINER-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
+ ; GFX12-TO-COMBINER-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 1)
+ ; GFX12-TO-COMBINER-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(s16) = G_SEXTLOAD %0 :: (load (s8), addrspace 1)
@@ -206,51 +176,21 @@ body: |
; GFX12-FAKE16-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX12-FAKE16-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
;
- ; GFX11-TO-COMBINER-TRUE16-LABEL: name: test_sextload_local_s16_from_s8
- ; GFX11-TO-COMBINER-TRUE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX11-TO-COMBINER-TRUE16-NEXT: {{ $}}
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s16) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 3)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[SEXTLOAD]](s16)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -65536
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[BITCAST]], [[COPY2]]
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[ZEXT]]
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ;
- ; GFX11-TO-COMBINER-FAKE16-LABEL: name: test_sextload_local_s16_from_s8
- ; GFX11-TO-COMBINER-FAKE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX11-TO-COMBINER-FAKE16-NEXT: {{ $}}
- ; GFX11-TO-COMBINER-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX11-TO-COMBINER-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX11-TO-COMBINER-FAKE16-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 3)
- ; GFX11-TO-COMBINER-FAKE16-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
- ;
- ; GFX12-TO-COMBINER-TRUE16-LABEL: name: test_sextload_local_s16_from_s8
- ; GFX12-TO-COMBINER-TRUE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX12-TO-COMBINER-TRUE16-NEXT: {{ $}}
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s16) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 3)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[SEXTLOAD]](s16)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -65536
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[BITCAST]], [[COPY2]]
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[ZEXT]]
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ; GFX11-TO-COMBINER-LABEL: name: test_sextload_local_s16_from_s8
+ ; GFX11-TO-COMBINER: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX11-TO-COMBINER-NEXT: {{ $}}
+ ; GFX11-TO-COMBINER-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX11-TO-COMBINER-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
+ ; GFX11-TO-COMBINER-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 3)
+ ; GFX11-TO-COMBINER-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
;
- ; GFX12-TO-COMBINER-FAKE16-LABEL: name: test_sextload_local_s16_from_s8
- ; GFX12-TO-COMBINER-FAKE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX12-TO-COMBINER-FAKE16-NEXT: {{ $}}
- ; GFX12-TO-COMBINER-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX12-TO-COMBINER-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX12-TO-COMBINER-FAKE16-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 3)
- ; GFX12-TO-COMBINER-FAKE16-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
+ ; GFX12-TO-COMBINER-LABEL: name: test_sextload_local_s16_from_s8
+ ; GFX12-TO-COMBINER: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX12-TO-COMBINER-NEXT: {{ $}}
+ ; GFX12-TO-COMBINER-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX12-TO-COMBINER-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
+ ; GFX12-TO-COMBINER-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 3)
+ ; GFX12-TO-COMBINER-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(s16) = G_SEXTLOAD %0 :: (load (s8), addrspace 3)
@@ -331,51 +271,21 @@ body: |
; GFX12-FAKE16-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX12-FAKE16-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
;
- ; GFX11-TO-COMBINER-TRUE16-LABEL: name: test_sextload_private_s16_from_s8
- ; GFX11-TO-COMBINER-TRUE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX11-TO-COMBINER-TRUE16-NEXT: {{ $}}
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s16) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 5)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[SEXTLOAD]](s16)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -65536
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[BITCAST]], [[COPY2]]
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[ZEXT]]
- ; GFX11-TO-COMBINER-TRUE16-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX11-TO-COMBINER-TRUE16-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ;
- ; GFX11-TO-COMBINER-FAKE16-LABEL: name: test_sextload_private_s16_from_s8
- ; GFX11-TO-COMBINER-FAKE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX11-TO-COMBINER-FAKE16-NEXT: {{ $}}
- ; GFX11-TO-COMBINER-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX11-TO-COMBINER-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX11-TO-COMBINER-FAKE16-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 5)
- ; GFX11-TO-COMBINER-FAKE16-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
- ;
- ; GFX12-TO-COMBINER-TRUE16-LABEL: name: test_sextload_private_s16_from_s8
- ; GFX12-TO-COMBINER-TRUE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX12-TO-COMBINER-TRUE16-NEXT: {{ $}}
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s16) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 5)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[SEXTLOAD]](s16)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -65536
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[BITCAST]], [[COPY2]]
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[ZEXT]]
- ; GFX12-TO-COMBINER-TRUE16-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX12-TO-COMBINER-TRUE16-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ; GFX11-TO-COMBINER-LABEL: name: test_sextload_private_s16_from_s8
+ ; GFX11-TO-COMBINER: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX11-TO-COMBINER-NEXT: {{ $}}
+ ; GFX11-TO-COMBINER-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX11-TO-COMBINER-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
+ ; GFX11-TO-COMBINER-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 5)
+ ; GFX11-TO-COMBINER-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
;
- ; GFX12-TO-COMBINER-FAKE16-LABEL: name: test_sextload_private_s16_from_s8
- ; GFX12-TO-COMBINER-FAKE16: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX12-TO-COMBINER-FAKE16-NEXT: {{ $}}
- ; GFX12-TO-COMBINER-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX12-TO-COMBINER-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
- ; GFX12-TO-COMBINER-FAKE16-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 5)
- ; GFX12-TO-COMBINER-FAKE16-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
+ ; GFX12-TO-COMBINER-LABEL: name: test_sextload_private_s16_from_s8
+ ; GFX12-TO-COMBINER: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX12-TO-COMBINER-NEXT: {{ $}}
+ ; GFX12-TO-COMBINER-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX12-TO-COMBINER-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
+ ; GFX12-TO-COMBINER-NEXT: [[AMDGPU_LOAD_D16_LO_I8_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_LOAD_D16_LO_I8 [[COPY]](p1), [[COPY1]] :: (load (s8), addrspace 5)
+ ; GFX12-TO-COMBINER-NEXT: $vgpr0 = COPY [[AMDGPU_LOAD_D16_LO_I8_]](<2 x s16>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(s16) = G_SEXTLOAD %0 :: (load (s8), addrspace 5)
|
🐧 Linux x64 Test Results
All executed tests passed, but another part of the build failed. Click on a failure below to see the details. bin/tco (Likely Already Failing)This test is already failing at the base commit.bin/fir-opt (Likely Already Failing)This test is already failing at the base commit.If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
🪟 Windows x64 Test Results
✅ The build succeeded and all tests passed. |
petar-avramovic
left a comment
There was a problem hiding this comment.
need to regenerate some failing ll tests
b0cac7e to
3534ffb
Compare
529ab5a to
0489434
Compare
3534ffb to
8b918ae
Compare
0489434 to
2756993
Compare
Stack created with GitHub Stacks CLI • Give Feedback 💬
See #195289 for previous discussion