Zig Version
0.14.0-dev.2097+d30e28754
Steps to Reproduce and Observed Behavior
This code: Godbolt link
const Pack = packed struct
{
field1: bool,
field2: bool,
field3: bool,
};
export fn init(pack: *Pack) void {
pack.field1 = true;
pack.field2 = false;
pack.field3 = true;
}
export fn init2(pack: *Pack) void {
pack.* = .{
.field1 = true,
.field2 = false,
.field3 = true,
};
}
Results in this assembly for x86-64:
init:
movzx eax, byte ptr [rdi]
and al, -8
or al, 5
mov byte ptr [rdi], al
ret
init2:
mov byte ptr [rdi], 5
ret
Running this command: zig build-obj ./src/llvm_code.zig -O ReleaseFast -target x86_64-linux -mcpu znver4 --verbose-llvm-ir -fstrip >llvm_code.ll 2>&1 sheds a little light on the situation:
define dso_local void @init(ptr align 1 nonnull %0) #0 {
1:
%2 = alloca [8 x i8], align 8
%3 = alloca [8 x i8], align 8
%4 = alloca [8 x i8], align 8
store ptr %0, ptr %4, align 8
%5 = load ptr, ptr %4, align 8
%6 = load i8, ptr %5, align 1
%7 = zext i1 true to i8
%8 = shl i8 %7, 0
%9 = xor i8 %8, -1
%10 = and i8 %6, %9
%11 = zext i1 true to i8
%12 = shl i8 %11, 0
%13 = or i8 %12, %10
store i8 %13, ptr %5, align 1
store ptr %0, ptr %3, align 8
%14 = load ptr, ptr %3, align 8
%15 = load i8, ptr %14, align 1
%16 = zext i1 true to i8
%17 = shl i8 %16, 1
%18 = xor i8 %17, -1
%19 = and i8 %15, %18
%20 = zext i1 false to i8
%21 = shl i8 %20, 1
%22 = or i8 %21, %19
store i8 %22, ptr %14, align 1
store ptr %0, ptr %2, align 8
%23 = load ptr, ptr %2, align 8
%24 = load i8, ptr %23, align 1
%25 = zext i1 true to i8
%26 = shl i8 %25, 2
%27 = xor i8 %26, -1
%28 = and i8 %24, %27
%29 = zext i1 true to i8
%30 = shl i8 %29, 2
%31 = or i8 %30, %28
store i8 %31, ptr %23, align 1
ret void
}
; Function Attrs: nounwind uwtable nosanitize_coverage skipprofile
define dso_local void @init2(ptr align 1 nonnull %0) #0 {
1:
store i3 -3, ptr %0, align 1
ret void
}
This Godbolt link shows that if we change the i8's to i3's in the LLVM IR of @init we can get the optimization we want out of LLVM. I did not verify that this makes 100% sense to do, but potentially this issue can be solved by using operations on i3's rather than i8's.
Expected Behavior
Both functions should compile down to mov byte ptr [rdi], 5
Zig Version
0.14.0-dev.2097+d30e28754
Steps to Reproduce and Observed Behavior
This code: Godbolt link
Results in this assembly for x86-64:
Running this command:
zig build-obj ./src/llvm_code.zig -O ReleaseFast -target x86_64-linux -mcpu znver4 --verbose-llvm-ir -fstrip >llvm_code.ll 2>&1sheds a little light on the situation:This Godbolt link shows that if we change the i8's to i3's in the LLVM IR of
@initwe can get the optimization we want out of LLVM. I did not verify that this makes 100% sense to do, but potentially this issue can be solved by using operations on i3's rather than i8's.Expected Behavior
Both functions should compile down to
mov byte ptr [rdi], 5