Coverage Report

Created: 2026-01-17 13:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/contrib/openblas/kernel/arm/sum.c
Line
Count
Source
1
/***************************************************************************
2
Copyright (c) 2013, The OpenBLAS Project
3
All rights reserved.
4
Redistribution and use in source and binary forms, with or without
5
modification, are permitted provided that the following conditions are
6
met:
7
1. Redistributions of source code must retain the above copyright
8
notice, this list of conditions and the following disclaimer.
9
2. Redistributions in binary form must reproduce the above copyright
10
notice, this list of conditions and the following disclaimer in
11
the documentation and/or other materials provided with the
12
distribution.
13
3. Neither the name of the OpenBLAS project nor the names of
14
its contributors may be used to endorse or promote products
15
derived from this software without specific prior written permission.
16
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*****************************************************************************/
27
28
/**************************************************************************************
29
* trivial copy of asum.c with the ABS() removed                                       *
30
**************************************************************************************/
31
32
#include "common.h"
33
#include "../simd/intrin.h"
34
#include <math.h>
35
36
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
37
0
{
38
0
  BLASLONG i = 0;
39
0
  FLOAT sumf = 0.0;
40
0
  if (n <= 0 || inc_x <= 0)
41
0
    return (sumf);
42
0
  n *= inc_x;
43
0
  if (inc_x == 1)
44
0
  {
45
#if V_SIMD && (!defined(DOUBLE) || (defined(DOUBLE) && V_SIMD_F64 && V_SIMD > 128))
46
#ifdef DOUBLE
47
0
    const int vstep = v_nlanes_f64;
48
    const int unrollx4 = n & (-vstep * 4);
49
    const int unrollx = n & -vstep;
50
0
    v_f64 vsum0 = v_zero_f64();
51
0
    v_f64 vsum1 = v_zero_f64();
52
0
    v_f64 vsum2 = v_zero_f64();
53
0
    v_f64 vsum3 = v_zero_f64();
54
0
    for (; i < unrollx4; i += vstep * 4)
55
0
    {
56
0
      vsum0 = v_add_f64(vsum0, v_loadu_f64(x + i));
57
0
      vsum1 = v_add_f64(vsum1, v_loadu_f64(x + i + vstep));
58
0
      vsum2 = v_add_f64(vsum2, v_loadu_f64(x + i + vstep * 2));
59
0
      vsum3 = v_add_f64(vsum3, v_loadu_f64(x + i + vstep * 3));
60
0
    }
61
0
    vsum0 = v_add_f64(
62
0
      v_add_f64(vsum0, vsum1), v_add_f64(vsum2, vsum3));
63
0
    for (; i < unrollx; i += vstep)
64
0
    {
65
0
      vsum0 = v_add_f64(vsum0, v_loadu_f64(x + i));
66
0
    }
67
    sumf = v_sum_f64(vsum0);
68
#else
69
0
    const int vstep = v_nlanes_f32;
70
    const int unrollx4 = n & (-vstep * 4);
71
    const int unrollx = n & -vstep;
72
0
    v_f32 vsum0 = v_zero_f32();
73
0
    v_f32 vsum1 = v_zero_f32();
74
0
    v_f32 vsum2 = v_zero_f32();
75
0
    v_f32 vsum3 = v_zero_f32();
76
0
    for (; i < unrollx4; i += vstep * 4)
77
0
    {
78
0
      vsum0 = v_add_f32(vsum0, v_loadu_f32(x + i));
79
0
      vsum1 = v_add_f32(vsum1, v_loadu_f32(x + i + vstep));
80
0
      vsum2 = v_add_f32(vsum2, v_loadu_f32(x + i + vstep * 2));
81
0
      vsum3 = v_add_f32(vsum3, v_loadu_f32(x + i + vstep * 3));
82
0
    }
83
0
    vsum0 = v_add_f32(
84
0
      v_add_f32(vsum0, vsum1), v_add_f32(vsum2, vsum3));
85
0
    for (; i < unrollx; i += vstep)
86
0
    {
87
0
      vsum0 = v_add_f32(vsum0, v_loadu_f32(x + i));
88
0
    }
89
    sumf = v_sum_f32(vsum0);
90
#endif
91
#else
92
    int n1 = n & -4;
93
0
    for (; i < n1; i += 4)
94
0
    {
95
0
      sumf += x[i] + x[i + 1] + x[i + 2] + x[i + 3];
96
0
    }
97
#endif
98
0
  }
99
0
  while (i < n)
100
0
  {
101
0
    sumf += x[i];
102
0
    i += inc_x;
103
0
  }
104
0
  return (sumf);
105
0
}
Unexecuted instantiation: ssum_k_PRESCOTT
Unexecuted instantiation: dsum_k_PRESCOTT
Unexecuted instantiation: ssum_k_CORE2
Unexecuted instantiation: dsum_k_CORE2
Unexecuted instantiation: ssum_k_NEHALEM
Unexecuted instantiation: dsum_k_NEHALEM
Unexecuted instantiation: ssum_k_BARCELONA
Unexecuted instantiation: dsum_k_BARCELONA
Unexecuted instantiation: ssum_k_SANDYBRIDGE
Unexecuted instantiation: dsum_k_SANDYBRIDGE
Unexecuted instantiation: ssum_k_BULLDOZER
Unexecuted instantiation: dsum_k_BULLDOZER
Unexecuted instantiation: ssum_k_PILEDRIVER
Unexecuted instantiation: dsum_k_PILEDRIVER
Unexecuted instantiation: ssum_k_STEAMROLLER
Unexecuted instantiation: dsum_k_STEAMROLLER
Unexecuted instantiation: ssum_k_EXCAVATOR
Unexecuted instantiation: dsum_k_EXCAVATOR
Unexecuted instantiation: ssum_k_HASWELL
Unexecuted instantiation: dsum_k_HASWELL
Unexecuted instantiation: ssum_k_ZEN
Unexecuted instantiation: dsum_k_ZEN