@@ -144,9 +144,9 @@ define void @gather_zero_stride(ptr noalias nocapture %A, ptr noalias nocapture
144144; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
145145; CHECK-NEXT: .LBB3_1: # %vector.body
146146; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
147- ; CHECK-NEXT: lbu a3, 0 (a1)
148- ; CHECK-NEXT: vle8.v v8 , (a0)
149- ; CHECK-NEXT: vadd.vx v8, v8, a3
147+ ; CHECK-NEXT: vlse8.v v8, (a1), zero
148+ ; CHECK-NEXT: vle8.v v9 , (a0)
149+ ; CHECK-NEXT: vadd.vv v8, v9, v8
150150; CHECK-NEXT: vse8.v v8, (a0)
151151; CHECK-NEXT: addi a0, a0, 32
152152; CHECK-NEXT: addi a1, a1, 160
@@ -182,9 +182,9 @@ define void @gather_zero_stride_i32(ptr noalias nocapture %A, ptr noalias nocapt
182182; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
183183; CHECK-NEXT: .LBB4_1: # %vector.body
184184; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
185- ; CHECK-NEXT: lw a3, 0 (a1)
186- ; CHECK-NEXT: vle32.v v8 , (a0)
187- ; CHECK-NEXT: vadd.vx v8, v8, a3
185+ ; CHECK-NEXT: vlse32.v v8, (a1), zero
186+ ; CHECK-NEXT: vle32.v v9 , (a0)
187+ ; CHECK-NEXT: vadd.vv v8, v9, v8
188188; CHECK-NEXT: vse32.v v8, (a0)
189189; CHECK-NEXT: addi a0, a0, 8
190190; CHECK-NEXT: addi a1, a1, 160
@@ -214,57 +214,22 @@ for.cond.cleanup: ; preds = %vector.body
214214}
215215
216216define void @gather_zero_stride_unfold (ptr noalias nocapture %A , ptr noalias nocapture readonly %B ) {
217- ; V-LABEL: gather_zero_stride_unfold:
218- ; V: # %bb.0: # %entry
219- ; V-NEXT: addi a2, a0, 1024
220- ; V-NEXT: li a3, 32
221- ; V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
222- ; V-NEXT: .LBB5_1: # %vector.body
223- ; V-NEXT: # =>This Inner Loop Header: Depth=1
224- ; V-NEXT: vlse8.v v8, (a1), zero
225- ; V-NEXT: vle8.v v9, (a0)
226- ; V-NEXT: vdivu.vv v8, v8, v9
227- ; V-NEXT: vse8.v v8, (a0)
228- ; V-NEXT: addi a0, a0, 32
229- ; V-NEXT: addi a1, a1, 160
230- ; V-NEXT: bne a0, a2, .LBB5_1
231- ; V-NEXT: # %bb.2: # %for.cond.cleanup
232- ; V-NEXT: ret
233- ;
234- ; ZVE32F-LABEL: gather_zero_stride_unfold:
235- ; ZVE32F: # %bb.0: # %entry
236- ; ZVE32F-NEXT: addi a2, a0, 1024
237- ; ZVE32F-NEXT: li a3, 32
238- ; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma
239- ; ZVE32F-NEXT: .LBB5_1: # %vector.body
240- ; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1
241- ; ZVE32F-NEXT: vlse8.v v8, (a1), zero
242- ; ZVE32F-NEXT: vle8.v v9, (a0)
243- ; ZVE32F-NEXT: vdivu.vv v8, v8, v9
244- ; ZVE32F-NEXT: vse8.v v8, (a0)
245- ; ZVE32F-NEXT: addi a0, a0, 32
246- ; ZVE32F-NEXT: addi a1, a1, 160
247- ; ZVE32F-NEXT: bne a0, a2, .LBB5_1
248- ; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup
249- ; ZVE32F-NEXT: ret
250- ;
251- ; NOT-OPTIMIZED-LABEL: gather_zero_stride_unfold:
252- ; NOT-OPTIMIZED: # %bb.0: # %entry
253- ; NOT-OPTIMIZED-NEXT: addi a2, a0, 1024
254- ; NOT-OPTIMIZED-NEXT: li a3, 32
255- ; NOT-OPTIMIZED-NEXT: vsetvli zero, a3, e8, m1, ta, ma
256- ; NOT-OPTIMIZED-NEXT: .LBB5_1: # %vector.body
257- ; NOT-OPTIMIZED-NEXT: # =>This Inner Loop Header: Depth=1
258- ; NOT-OPTIMIZED-NEXT: lbu a3, 0(a1)
259- ; NOT-OPTIMIZED-NEXT: vle8.v v8, (a0)
260- ; NOT-OPTIMIZED-NEXT: vmv.v.x v9, a3
261- ; NOT-OPTIMIZED-NEXT: vdivu.vv v8, v9, v8
262- ; NOT-OPTIMIZED-NEXT: vse8.v v8, (a0)
263- ; NOT-OPTIMIZED-NEXT: addi a0, a0, 32
264- ; NOT-OPTIMIZED-NEXT: addi a1, a1, 160
265- ; NOT-OPTIMIZED-NEXT: bne a0, a2, .LBB5_1
266- ; NOT-OPTIMIZED-NEXT: # %bb.2: # %for.cond.cleanup
267- ; NOT-OPTIMIZED-NEXT: ret
217+ ; CHECK-LABEL: gather_zero_stride_unfold:
218+ ; CHECK: # %bb.0: # %entry
219+ ; CHECK-NEXT: addi a2, a0, 1024
220+ ; CHECK-NEXT: li a3, 32
221+ ; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
222+ ; CHECK-NEXT: .LBB5_1: # %vector.body
223+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
224+ ; CHECK-NEXT: vlse8.v v8, (a1), zero
225+ ; CHECK-NEXT: vle8.v v9, (a0)
226+ ; CHECK-NEXT: vdivu.vv v8, v8, v9
227+ ; CHECK-NEXT: vse8.v v8, (a0)
228+ ; CHECK-NEXT: addi a0, a0, 32
229+ ; CHECK-NEXT: addi a1, a1, 160
230+ ; CHECK-NEXT: bne a0, a2, .LBB5_1
231+ ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
232+ ; CHECK-NEXT: ret
268233entry:
269234 br label %vector.body
270235
@@ -962,9 +927,9 @@ define void @gather_zero_stride_fp(ptr noalias nocapture %A, ptr noalias nocaptu
962927; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
963928; CHECK-NEXT: .LBB16_1: # %vector.body
964929; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
965- ; CHECK-NEXT: flw fa5, 0 (a1)
966- ; CHECK-NEXT: vle32.v v8 , (a0)
967- ; CHECK-NEXT: vfadd.vf v8, v8, fa5
930+ ; CHECK-NEXT: vlse32.v v8, (a1), zero
931+ ; CHECK-NEXT: vle32.v v9 , (a0)
932+ ; CHECK-NEXT: vfadd.vv v8, v9, v8
968933; CHECK-NEXT: vse32.v v8, (a0)
969934; CHECK-NEXT: addi a0, a0, 128
970935; CHECK-NEXT: addi a1, a1, 640
@@ -992,3 +957,5 @@ vector.body: ; preds = %vector.body, %entry
992957for.cond.cleanup: ; preds = %vector.body
993958 ret void
994959}
960+ ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
961+ ; NOT-OPTIMIZED: {{.*}}
0 commit comments