[ 9%] Building CUDA object ggml/src/ggml-cuda/CMakeFiles/ggml-cuda.dir/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu.o
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(34): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + 0] = v.x;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q4_0, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q4_0, dst_t=nv_bfloat16]" at line 764
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(35): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + y_offset] = v.y;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q4_0, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q4_0, dst_t=nv_bfloat16]" at line 764
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(34): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + 0] = v.x;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q4_1, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q4_1, dst_t=nv_bfloat16]" at line 766
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(35): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + y_offset] = v.y;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q4_1, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q4_1, dst_t=nv_bfloat16]" at line 766
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(34): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + 0] = v.x;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q5_0, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q5_0, dst_t=nv_bfloat16]" at line 768
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(35): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + y_offset] = v.y;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q5_0, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q5_0, dst_t=nv_bfloat16]" at line 768
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(34): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + 0] = v.x;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q5_1, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q5_1, dst_t=nv_bfloat16]" at line 770
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(35): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + y_offset] = v.y;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q5_1, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q5_1, dst_t=nv_bfloat16]" at line 770
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(34): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + 0] = v.x;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=1, dequantize_kernel=dequantize_q8_0, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=1, dequantize_kernel=dequantize_q8_0, dst_t=nv_bfloat16]" at line 772
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(35): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + y_offset] = v.y;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=1, dequantize_kernel=dequantize_q8_0, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=1, dequantize_kernel=dequantize_q8_0, dst_t=nv_bfloat16]" at line 772
Git commit
221c0e0
Operating systems
Linux
GGML backends
CUDA
Problem description & steps to reproduce
My daily compilation of llama.cpp from newest source failed on
convert.cu. Can confirm reverting to 18f3b5f fixes it.First Bad Commit
07a19e2
Compile command
Relevant log output