Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 40 additions & 8 deletions kernel/arm/sum.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,55 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* trivial copy of asum.c with the ABS() removed *
**************************************************************************************/


#include "common.h"
#include "../simd/intrin.h"
#include <math.h>

FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;
BLASLONG i = 0;
FLOAT sumf = 0.0;
if (n <= 0 || inc_x <= 0) return(sumf);

if (n <= 0 || inc_x <= 0)
return (sumf);
n *= inc_x;
while(i < n)
if (inc_x == 1)
{
#if V_SIMD
const int vstep = v_nlanes_f32;
const int unrollx4 = n & (-vstep * 4);
const int unrollx = n & -vstep;
v_f32 vsum0 = v_zero_f32();
v_f32 vsum1 = v_zero_f32();
v_f32 vsum2 = v_zero_f32();
v_f32 vsum3 = v_zero_f32();
while (i < unrollx4)
{
vsum0 = v_add_f32(vsum0, v_loadu_f32(x));
vsum1 = v_add_f32(vsum1, v_loadu_f32(x + vstep));
vsum2 = v_add_f32(vsum2, v_loadu_f32(x + vstep * 2));
vsum3 = v_add_f32(vsum3, v_loadu_f32(x + vstep * 3));
i += vstep * 4;
}
vsum0 = v_add_f32(
v_add_f32(vsum0, vsum1), v_add_f32(vsum2, vsum3));
while (i < unrollx)
{
vsum0 = v_add_f32(vsum0, v_loadu_f32(x + i));
i += vstep;
}
sumf = v_sum_f32(vsum0);
#else
int n1 = n & -4;
for (; i < n1; i += 4)
{
sumf += x[i] + x[i + 1] + x[i + 2] + x[i + 3];
}
#endif
}
while (i < n)
{
sumf += x[i];
i += inc_x;
}
return(sumf);
return (sumf);
}