-
Notifications
You must be signed in to change notification settings - Fork 349
Math: Optimise 16-bit matrix multiplication functions. #9088
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4a3f6b9
1ac1b5d
1f4f10a
5ddb7c0
53dbdef
65c21f0
c3eeab1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,93 +3,138 @@ | |
| // Copyright(c) 2022 Intel Corporation. All rights reserved. | ||
| // | ||
| // Author: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com> | ||
| // Shriram Shastry <malladi.sastry@linux.intel.com> | ||
| // | ||
|
|
||
| #include <sof/math/matrix.h> | ||
| #include <errno.h> | ||
| #include <stdint.h> | ||
|
|
||
| /** | ||
| * Description: Performs matrix multiplication of two fixed-point 16-bit integer matrices, | ||
| * storing the result in a third matrix. It accounts for fractional bits for | ||
| * fixed-point arithmetic, adjusting the result accordingly. | ||
| * | ||
| * Arguments: | ||
| * a: pointer to the first input matrix | ||
| * b: pointer to the second input matrix | ||
| * c: pointer to the output matrix to store result | ||
| * | ||
| * Return: | ||
| * 0 on successful multiplication. | ||
| * -EINVAL if input dimensions do not allow for multiplication. | ||
| * -ERANGE if the shift operation might cause integer overflow. | ||
| */ | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding documentation is good, but using the doxygen syntax would be better.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
| int mat_multiply(struct mat_matrix_16b *a, struct mat_matrix_16b *b, struct mat_matrix_16b *c) | ||
| { | ||
| int64_t s; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please keep the declarations in the beginning in function and not change the code style. All declarations in beginning show at first glance the usage amount of stack variables. The equations after this are also more readable without declarations. |
||
| int16_t *x; | ||
| int16_t *y; | ||
| int16_t *z = c->data; | ||
| int i, j, k; | ||
| int y_inc = b->columns; | ||
| const int shift_minus_one = a->fractions + b->fractions - c->fractions - 1; | ||
|
|
||
| /* Validate matrix dimensions are compatible for multiplication */ | ||
| if (a->columns != b->rows || a->rows != c->rows || b->columns != c->columns) | ||
| return -EINVAL; | ||
|
|
||
| /* If all data is Q0 */ | ||
| if (shift_minus_one == -1) { | ||
| int32_t acc; /* Accumulator for dot product calculation */ | ||
| int16_t *x, *y, *z = c->data; /* Pointers for matrices a, b, and c */ | ||
| int i, j, k; /* Loop counters */ | ||
| int y_inc = b->columns; /* Column increment for matrix b elements */ | ||
| /* Calculate shift amount for adjusting fractional bits in the result */ | ||
| const int shift = a->fractions + b->fractions - c->fractions - 1; | ||
cujomalainey marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| /* Check shift to ensure no integer overflow occurs during shifting */ | ||
| if (shift < -1 || shift > 31) | ||
| return -ERANGE; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. your "cycles" figures before and after are very similar to the previous commit. This tells me that you probably measured a state before several changes (possibly the whole PR) and after them, not before and after each commit. So "cycles" don't seem correct. |
||
|
|
||
| /* Special case when shift is -1 (Q0 data) */ | ||
| if (shift == -1) { | ||
| /* Matrix multiplication loop */ | ||
| for (i = 0; i < a->rows; i++) { | ||
| for (j = 0; j < b->columns; j++) { | ||
| s = 0; | ||
| /* Initialize accumulator for each element */ | ||
| acc = 0; | ||
| /* Set x at the start of ith row of a */ | ||
| x = a->data + a->columns * i; | ||
| /* Set y at the top of jth column of b */ | ||
| y = b->data + j; | ||
| /* Dot product loop */ | ||
| for (k = 0; k < b->rows; k++) { | ||
| s += (int32_t)(*x) * (*y); | ||
| x++; | ||
| /* Multiply & accumulate */ | ||
| acc += (int32_t)(*x++) * (*y); | ||
| /* Move to next row in the current column of b */ | ||
| y += y_inc; | ||
| } | ||
| *z = (int16_t)s; /* For Q16.0 */ | ||
| z++; | ||
| /* Enhanced pointer arithmetic */ | ||
| *z = (int16_t)acc; | ||
| z++; /* Move to the next element in the output matrix */ | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you just changed |
||
| } | ||
| } | ||
|
|
||
| return 0; | ||
| } | ||
|
|
||
| for (i = 0; i < a->rows; i++) { | ||
| for (j = 0; j < b->columns; j++) { | ||
| s = 0; | ||
| x = a->data + a->columns * i; | ||
| y = b->data + j; | ||
| for (k = 0; k < b->rows; k++) { | ||
| s += (int32_t)(*x) * (*y); | ||
| x++; | ||
| y += y_inc; | ||
| } else { | ||
| /* General case for other shift values */ | ||
| for (i = 0; i < a->rows; i++) { | ||
| for (j = 0; j < b->columns; j++) { | ||
| /* Initialize accumulator for each element */ | ||
| acc = 0; | ||
| /* Set x at the start of ith row of a */ | ||
| x = a->data + a->columns * i; | ||
| /* Set y at the top of jth column of b */ | ||
| y = b->data + j; | ||
| /* Dot product loop */ | ||
| for (k = 0; k < b->rows; k++) { | ||
| /* Multiply & accumulate Enhanced pointer arithmetic */ | ||
| acc += (int32_t)(*x++) * (*y); | ||
| /* Move to next row in the current column of b */ | ||
| y += y_inc; | ||
| } | ||
| /* Enhanced pointer arithmetic */ | ||
| *z = (int16_t)(((acc >> shift) + 1) >> 1); /*Shift to Qx.y */ | ||
| z++; /* Move to the next element in the output matrix */ | ||
| } | ||
| *z = (int16_t)(((s >> shift_minus_one) + 1) >> 1); /*Shift to Qx.y */ | ||
| z++; | ||
| } | ||
| } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lots of not really helpful comments, very confusing whether this changes anything. With this commit I don't think I can approve the PR |
||
| return 0; | ||
| } | ||
|
|
||
| /** | ||
| * Description: Performs element-wise multiplication of two matrices with 16-bit integer elements | ||
| * and stores the result in a third matrix. Checks that all matrices have the same | ||
| * dimensions and adjusts for fractional bits appropriately. This operation handles | ||
| * the manipulation of fixed-point precision based on the fractional bits present in | ||
| * the matrices. | ||
| * | ||
| * Arguments: | ||
| * a - pointer to the first input matrix | ||
| * b - pointer to the second input matrix | ||
| * c - pointer to the output matrix where the result will be stored | ||
| * | ||
| * Returns: | ||
| * 0 on successful multiplication, | ||
| * -EINVAL if input pointers are NULL or matrix dimensions do not match. | ||
| */ | ||
| int mat_multiply_elementwise(struct mat_matrix_16b *a, struct mat_matrix_16b *b, | ||
| struct mat_matrix_16b *c) | ||
| { int64_t p; | ||
| { | ||
| /* Validate matrix dimensions and non-null pointers */ | ||
| if (!a || !b || !c || | ||
| a->columns != b->columns || a->rows != b->rows || | ||
| c->columns != a->columns || c->rows != a->rows) { | ||
| return -EINVAL; | ||
| } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you removed this in your previous commit |
||
|
|
||
| int16_t *x = a->data; | ||
| int16_t *y = b->data; | ||
| int16_t *z = c->data; | ||
| int32_t p; | ||
| int i; | ||
| const int shift_minus_one = a->fractions + b->fractions - c->fractions - 1; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same in this commit - please check that the commit description still matches the contents and I'd like to be able to see that too. As it stands, it's difficult to see where "unnecessary conditionals" are eliminated |
||
|
|
||
| if (a->columns != b->columns || b->columns != c->columns || | ||
| a->rows != b->rows || b->rows != c->rows) { | ||
| return -EINVAL; | ||
| } | ||
|
|
||
| /* If all data is Q0 */ | ||
| if (shift_minus_one == -1) { | ||
| for (i = 0; i < a->rows * a->columns; i++) { | ||
| for (i = 0; i < a->rows * a->columns; i++, x++, y++, z++) | ||
| *z = *x * *y; | ||
| x++; | ||
| y++; | ||
| z++; | ||
| } | ||
|
|
||
| return 0; | ||
| } | ||
|
|
||
| for (i = 0; i < a->rows * a->columns; i++) { | ||
| for (i = 0; i < a->rows * a->columns; i++, x++, y++, z++) { | ||
| p = (int32_t)(*x) * *y; | ||
| *z = (int16_t)(((p >> shift_minus_one) + 1) >> 1); /*Shift to Qx.y */ | ||
| x++; | ||
| y++; | ||
| z++; | ||
| *z = (int16_t)(((p >> shift_minus_one) + 1) >> 1); /* Shift to Qx.y */ | ||
| } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this enhances or streamlines anything. NAK. |
||
|
|
||
| return 0; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.