Question
When I load the qwen2.5 model, the following error occurs after the data length increases. Here I have set model.cfg.n_ctx to 32768 in advance, so I want to know how can I solve it?
tokenizer = AutoTokenizer.from_pretrained(
"Qwen/Qwen2.5-0.5B-Instruct",
device_map='auto',
token = TOKEN
)
model = HookedTransformer.from_pretrained_no_processing(
"Qwen/Qwen2.5-0.5B-Instruct",
dtype=torch.bfloat16,
default_padding_side='left',
device_map='auto',
use_auth_token = TOKEN
)
model.cfg.n_ctx = 32768
# harmful_tokens(torch.Size([128, 4135]))
for i in tqdm(range(num_batches)):
print(i)
start_idx = i * batch_size
end_idx = min(n_inst_train, start_idx + batch_size)
# Run models on harmful and harmless prompts, cache activations
harmful_logits, harmful_cache = model.run_with_cache(
harmful_tokens[start_idx:end_idx],
names_filter=lambda hook_name: 'resid' in hook_name,
device='cpu',
reset_hooks_end=True
)
harmless_logits, harmless_cache = model.run_with_cache(
harmless_tokens[start_idx:end_idx],
names_filter=lambda hook_name: 'resid' in hook_name,
device='cpu',
reset_hooks_end=True
)
../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [52,0,0] Assertion -sizes[i] <= index && index < sizes[i] && "index out of bounds" failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [53,0,0] Assertion -sizes[i] <= index && index < sizes[i] && "index out of bounds" failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [54,0,0] Assertion -sizes[i] <= index && index < sizes[i] && "index out of bounds" failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [55,0,0] Assertion -sizes[i] <= index && index < sizes[i] && "index out of bounds" failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [56,0,0] Assertion -sizes[i] <= index && index < sizes[i] && "index out of bounds" failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [57,0,0] Assertion -sizes[i] <= index && index < sizes[i] && "index out of bounds" failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [58,0,0] Assertion -sizes[i] <= index && index < sizes[i] && "index out of bounds" failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [59,0,0] Assertion -sizes[i] <= index && index < sizes[i] && "index out of bounds" failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [60,0,0] Assertion -sizes[i] <= index && index < sizes[i] && "index out of bounds" failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [61,0,0] Assertion -sizes[i] <= index && index < sizes[i] && "index out of bounds" failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [62,0,0] Assertion -sizes[i] <= index && index < sizes[i] && "index out of bounds" failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [63,0,0] Assertion -sizes[i] <= index && index < sizes[i] && "index out of bounds" failed.
0%| | 0/8 [00:02<?, ?it/s]
Traceback (most recent call last):
File "/llm-align/lxw/work/Refusal-LLMs/analyze_refusal.py", line 175, in
harmful_logits, harmful_cache = model.run_with_cache(
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/transformer_lens/HookedTransformer.py", line 657, in run_with_cache
out, cache_dict = super().run_with_cache(
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/transformer_lens/hook_points.py", line 568, in run_with_cache
model_out = self(*model_args, **model_kwargs)
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/transformer_lens/HookedTransformer.py", line 575, in forward
residual = block(
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/transformer_lens/components/transformer_block.py", line 160, in forward
self.attn(
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/transformer_lens/components/abstract_attention.py", line 207, in forward
q = self.hook_rot_q(self.apply_rotary(q, kv_cache_pos_offset, attention_mask))
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/transformer_lens/components/abstract_attention.py", line 575, in apply_rotary
return torch.cat([x_rotated, x_pass], dim=-1)
RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with TORCH_USE_CUDA_DSA to enable device-side assertions
Question
When I load the qwen2.5 model, the following error occurs after the data length increases. Here I have set model.cfg.n_ctx to 32768 in advance, so I want to know how can I solve it?
../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [52,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed.../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [53,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed.../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [54,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed.../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [55,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed.../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [56,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed.../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [57,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed.../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [58,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed.../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [59,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed.../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [60,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed.../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [61,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed.../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [62,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed.../aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [1296,0,0], thread: [63,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed.0%| | 0/8 [00:02<?, ?it/s]
Traceback (most recent call last):
File "/llm-align/lxw/work/Refusal-LLMs/analyze_refusal.py", line 175, in
harmful_logits, harmful_cache = model.run_with_cache(
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/transformer_lens/HookedTransformer.py", line 657, in run_with_cache
out, cache_dict = super().run_with_cache(
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/transformer_lens/hook_points.py", line 568, in run_with_cache
model_out = self(*model_args, **model_kwargs)
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/transformer_lens/HookedTransformer.py", line 575, in forward
residual = block(
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/transformer_lens/components/transformer_block.py", line 160, in forward
self.attn(
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/transformer_lens/components/abstract_attention.py", line 207, in forward
q = self.hook_rot_q(self.apply_rotary(q, kv_cache_pos_offset, attention_mask))
File "/llm-align/liujunchen/liujunchen/env/glm-130b-env/miniconda3/envs/glm/envs/refusal_llm/lib/python3.10/site-packages/transformer_lens/components/abstract_attention.py", line 575, in apply_rotary
return torch.cat([x_rotated, x_pass], dim=-1)
RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with
TORCH_USE_CUDA_DSAto enable device-side assertions