Skip to content

Commit e199ef2

Browse files
authored
Merge 93e04a5 into 1531781
2 parents 1531781 + 93e04a5 commit e199ef2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+3301
-3825
lines changed

aie_kernels/aie2p/softmax.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,4 +177,12 @@ void partial_softmax_bf16(bfloat16 *restrict input,
177177
partial_softmax_alias_bf16(input, output, scale_buffer, input_size, row_idx, num_rows, scale);
178178
}
179179

180+
void mask_bf16(bfloat16 *inout, const int32 unmasked_size, const int32 total_size)
181+
{
182+
// TODO: Optimize this to use vector code
183+
for (int32 i = unmasked_size; i < total_size; i++) {
184+
inout[i] = (bfloat16)(-INFINITY);
185+
}
186+
}
187+
180188
} // extern "C"

aie_kernels/generic/mv.cc

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515

1616
#include <aie_api/aie.hpp>
1717

18+
#ifndef VEC_SIZE
19+
#define VEC_SIZE 64
20+
#endif
21+
1822
void matvec_scalar(uint32_t m,
1923
uint32_t k,
2024
const bfloat16 *__restrict a,
@@ -40,22 +44,17 @@ Matrix-vector multiplication kernel
4044
- c: Pointer to the output vector
4145
- r: Vector size; data from the matrix and vector will be loaded in and processed in chunks of this size
4246
*/
43-
template <uint32_t r>
44-
void matvec_vectorized(uint32_t m,
45-
uint32_t k,
46-
const bfloat16 *__restrict a,
47-
const bfloat16 *__restrict b,
48-
bfloat16 *__restrict c)
47+
template <uint32_t r, uint32_t k>
48+
void matvec_vectorized(uint32_t m, const bfloat16 *__restrict a, const bfloat16 *__restrict b, bfloat16 *__restrict c)
4949
{
5050
::aie::set_rounding(aie::rounding_mode::conv_even);
5151
bfloat16 *c_end = c + m;
5252
const bfloat16 *b_end = b + k;
5353
for (; c < c_end; c++) {
5454
aie::accum acc = aie::zeros<accfloat, r>();
55-
// The following two pragmas enable pipelining the zero-overhead loop, but they do assume that k is at least
56-
// two. This assumption should hold for any useful use of this function; if k were one, this would be a simple
57-
// scalar multiplication of a vector.
58-
AIE_LOOP_MIN_ITERATION_COUNT(2)
55+
// The following two pragmas enable pipelining the zero-overhead loop, but they do assume that there are at
56+
// least two iterations of the loop, i.e. k >= 2*r. This pragma will break the code if that is not the case!
57+
AIE_LOOP_MIN_ITERATION_COUNT(k / VEC_SIZE)
5958
for (const bfloat16 *__restrict b_cur = b; b_cur < b_end; b_cur += r, a += r) {
6059
aie::vector<bfloat16, r> a_vec = aie::load_v<r>(a);
6160
aie::vector<bfloat16, r> b_vec = aie::load_v<r>(b_cur);
@@ -72,25 +71,23 @@ extern "C" {
7271
* `c`. */
7372

7473
void matvec_scalar_bf16_bf16(uint32_t m,
75-
uint32_t k,
7674
uint32_t row_offset,
7775
const bfloat16 *__restrict a_in,
7876
const bfloat16 *__restrict b_in,
7977
bfloat16 *__restrict c_out)
8078
{
8179
c_out += row_offset;
82-
matvec_scalar(m, k, a_in, b_in, c_out);
80+
matvec_scalar(m, DIM_K, a_in, b_in, c_out);
8381
}
8482

8583
void matvec_vectorized_bf16_bf16(uint32_t m,
86-
uint32_t k,
8784
uint32_t row_offset,
8885
const bfloat16 *__restrict a_in,
8986
const bfloat16 *__restrict b_in,
9087
bfloat16 *__restrict c_out)
9188
{
9289
c_out += row_offset;
93-
matvec_vectorized<64>(m, k, a_in, b_in, c_out);
90+
matvec_vectorized<VEC_SIZE, DIM_K>(m, a_in, b_in, c_out);
9491
}
9592

9693
} // extern "C"

conftest.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@
1616
@pytest.fixture
1717
def aie_context():
1818
"""Create a fresh AIEContext for each test"""
19-
return AIEContext()
19+
ctx = AIEContext()
20+
yield ctx
21+
ctx.device_manager.reset()
2022

2123

2224
def pytest_addoption(parser):

iron/common/__init__.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,16 @@
33

44
"""Common utilities and base classes for IRON operators."""
55

6-
from .aie_base import AIEOperatorBase, AIEOperatorConstraintError
7-
from .aie_context import AIEContext
6+
from .base import (
7+
AIEOperatorBase,
8+
MLIROperator,
9+
CompositeOperator,
10+
CompositeCallable,
11+
AIEBuffer,
12+
SingleXclbinCallable,
13+
AIERuntimeArgSpec,
14+
)
15+
from .context import AIEContext
816
from .compilation import (
917
XclbinArtifact,
1018
InstsBinArtifact,
@@ -13,4 +21,4 @@
1321
SourceArtifact,
1422
PythonGeneratedMLIRArtifact,
1523
)
16-
from .aie_device_manager import AIEDeviceManager
24+
from .device_manager import AIEDeviceManager

iron/common/aie_base.py

Lines changed: 0 additions & 229 deletions
This file was deleted.

0 commit comments

Comments
 (0)